xref: /freebsd/sys/vm/vm_mmap.c (revision 0659df6faddfb27ba54a2cae2a12552cf4f823a0)
160727d8bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1988 University of Utah.
5df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
6df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
7df8bae1dSRodney W. Grimes  *
8df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
9df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
10df8bae1dSRodney W. Grimes  * Science Department.
11df8bae1dSRodney W. Grimes  *
12df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
13df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
14df8bae1dSRodney W. Grimes  * are met:
15df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
17df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
18df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
19df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
20fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
21df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
22df8bae1dSRodney W. Grimes  *    without specific prior written permission.
23df8bae1dSRodney W. Grimes  *
24df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
35df8bae1dSRodney W. Grimes  *
36df8bae1dSRodney W. Grimes  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
39df8bae1dSRodney W. Grimes  */
40df8bae1dSRodney W. Grimes 
41df8bae1dSRodney W. Grimes /*
42df8bae1dSRodney W. Grimes  * Mapped file (mmap) interface to VM
43df8bae1dSRodney W. Grimes  */
44df8bae1dSRodney W. Grimes 
45874651b1SDavid E. O'Brien #include <sys/cdefs.h>
46874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$");
47874651b1SDavid E. O'Brien 
4849874f6eSJoseph Koshy #include "opt_hwpmc_hooks.h"
493d653db0SAlan Cox #include "opt_vm.h"
50e9822d92SJoerg Wunsch 
51df8bae1dSRodney W. Grimes #include <sys/param.h>
52df8bae1dSRodney W. Grimes #include <sys/systm.h>
534a144410SRobert Watson #include <sys/capsicum.h>
54a9d2f8d8SRobert Watson #include <sys/kernel.h>
55fb919e4dSMark Murray #include <sys/lock.h>
5623955314SAlfred Perlstein #include <sys/mutex.h>
57d2d3e875SBruce Evans #include <sys/sysproto.h>
585dc7e31aSKonstantin Belousov #include <sys/elf.h>
59df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
60acd3428bSRobert Watson #include <sys/priv.h>
61df8bae1dSRodney W. Grimes #include <sys/proc.h>
6255648840SJohn Baldwin #include <sys/procctl.h>
631ba5ad42SEdward Tomasz Napierala #include <sys/racct.h>
64070f64feSMatthew Dillon #include <sys/resource.h>
65070f64feSMatthew Dillon #include <sys/resourcevar.h>
6689f6b863SAttilio Rao #include <sys/rwlock.h>
677e19eda4SAndrey Zonov #include <sys/sysctl.h>
68df8bae1dSRodney W. Grimes #include <sys/vnode.h>
693ac4d1efSBruce Evans #include <sys/fcntl.h>
70df8bae1dSRodney W. Grimes #include <sys/file.h>
71df8bae1dSRodney W. Grimes #include <sys/mman.h>
72b483c7f6SGuido van Rooij #include <sys/mount.h>
73df8bae1dSRodney W. Grimes #include <sys/conf.h>
744183b6b6SPeter Wemm #include <sys/stat.h>
7555648840SJohn Baldwin #include <sys/syscallsubr.h>
76497a8238SKonstantin Belousov #include <sys/sysent.h>
77efeaf95aSDavid Greenman #include <sys/vmmeter.h>
78a7f67facSKonstantin Belousov #if defined(__amd64__) || defined(__i386__) /* for i386_read_exec */
79a7f67facSKonstantin Belousov #include <machine/md_var.h>
80a7f67facSKonstantin Belousov #endif
81df8bae1dSRodney W. Grimes 
8251d1f690SRobert Watson #include <security/audit/audit.h>
83aed55708SRobert Watson #include <security/mac/mac_framework.h>
84aed55708SRobert Watson 
85df8bae1dSRodney W. Grimes #include <vm/vm.h>
86efeaf95aSDavid Greenman #include <vm/vm_param.h>
87efeaf95aSDavid Greenman #include <vm/pmap.h>
88efeaf95aSDavid Greenman #include <vm/vm_map.h>
89efeaf95aSDavid Greenman #include <vm/vm_object.h>
901c7c3c6aSMatthew Dillon #include <vm/vm_page.h>
91df8bae1dSRodney W. Grimes #include <vm/vm_pager.h>
92b5e8ce9fSBruce Evans #include <vm/vm_pageout.h>
93efeaf95aSDavid Greenman #include <vm/vm_extern.h>
94867a482dSJohn Dyson #include <vm/vm_page.h>
9584110e7eSKonstantin Belousov #include <vm/vnode_pager.h>
96df8bae1dSRodney W. Grimes 
9749874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
9849874f6eSJoseph Koshy #include <sys/pmckern.h>
9949874f6eSJoseph Koshy #endif
10049874f6eSJoseph Koshy 
1017e19eda4SAndrey Zonov int old_mlock = 0;
102af3b2549SHans Petter Selasky SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
1037e19eda4SAndrey Zonov     "Do not apply RLIMIT_MEMLOCK on mlockall");
1043fbc2e00SKonstantin Belousov static int mincore_mapped = 1;
1053fbc2e00SKonstantin Belousov SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0,
1063fbc2e00SKonstantin Belousov     "mincore reports mappings, not residency");
10774a1b66cSBrooks Davis static int imply_prot_max = 0;
10874a1b66cSBrooks Davis SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0,
1094d13f784SEd Maste     "Imply maximum page protections in mmap() when none are specified");
1107e19eda4SAndrey Zonov 
111edb572a3SJohn Baldwin #ifdef MAP_32BIT
112edb572a3SJohn Baldwin #define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
113d2d3e875SBruce Evans #endif
1140d94caffSDavid Greenman 
115847ab36bSMark Johnston _Static_assert(MAXPAGESIZES <= 4, "MINCORE_SUPER too narrow");
116847ab36bSMark Johnston 
117edb572a3SJohn Baldwin #ifndef _SYS_SYSPROTO_H_
118edb572a3SJohn Baldwin struct sbrk_args {
119edb572a3SJohn Baldwin 	int incr;
120edb572a3SJohn Baldwin };
121edb572a3SJohn Baldwin #endif
122edb572a3SJohn Baldwin 
123df8bae1dSRodney W. Grimes int
12404e89ffbSKonstantin Belousov sys_sbrk(struct thread *td, struct sbrk_args *uap)
125df8bae1dSRodney W. Grimes {
126df8bae1dSRodney W. Grimes 	/* Not yet implemented */
127df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
128df8bae1dSRodney W. Grimes }
129df8bae1dSRodney W. Grimes 
130d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
131df8bae1dSRodney W. Grimes struct sstk_args {
132df8bae1dSRodney W. Grimes 	int incr;
133df8bae1dSRodney W. Grimes };
134d2d3e875SBruce Evans #endif
1350d94caffSDavid Greenman 
136df8bae1dSRodney W. Grimes int
13704e89ffbSKonstantin Belousov sys_sstk(struct thread *td, struct sstk_args *uap)
138df8bae1dSRodney W. Grimes {
139df8bae1dSRodney W. Grimes 	/* Not yet implemented */
140df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
141df8bae1dSRodney W. Grimes }
142df8bae1dSRodney W. Grimes 
1431930e303SPoul-Henning Kamp #if defined(COMPAT_43)
144df8bae1dSRodney W. Grimes int
145d48719bdSBrooks Davis ogetpagesize(struct thread *td, struct ogetpagesize_args *uap)
146df8bae1dSRodney W. Grimes {
14704e89ffbSKonstantin Belousov 
148b40ce416SJulian Elischer 	td->td_retval[0] = PAGE_SIZE;
149df8bae1dSRodney W. Grimes 	return (0);
150df8bae1dSRodney W. Grimes }
1511930e303SPoul-Henning Kamp #endif				/* COMPAT_43 */
152df8bae1dSRodney W. Grimes 
15354f42e4bSPeter Wemm /*
15454f42e4bSPeter Wemm  * Memory Map (mmap) system call.  Note that the file offset
15554f42e4bSPeter Wemm  * and address are allowed to be NOT page aligned, though if
15654f42e4bSPeter Wemm  * the MAP_FIXED flag it set, both must have the same remainder
15754f42e4bSPeter Wemm  * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
15854f42e4bSPeter Wemm  * page-aligned, the actual mapping starts at trunc_page(addr)
15954f42e4bSPeter Wemm  * and the return value is adjusted up by the page offset.
160b4309055SMatthew Dillon  *
161b4309055SMatthew Dillon  * Generally speaking, only character devices which are themselves
162b4309055SMatthew Dillon  * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
163b4309055SMatthew Dillon  * there would be no cache coherency between a descriptor and a VM mapping
164b4309055SMatthew Dillon  * both to the same character device.
16554f42e4bSPeter Wemm  */
166d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
167df8bae1dSRodney W. Grimes struct mmap_args {
168651bb817SAlexander Langer 	void *addr;
169df8bae1dSRodney W. Grimes 	size_t len;
170df8bae1dSRodney W. Grimes 	int prot;
171df8bae1dSRodney W. Grimes 	int flags;
172df8bae1dSRodney W. Grimes 	int fd;
173df8bae1dSRodney W. Grimes 	long pad;
174df8bae1dSRodney W. Grimes 	off_t pos;
175df8bae1dSRodney W. Grimes };
176d2d3e875SBruce Evans #endif
177df8bae1dSRodney W. Grimes 
178df8bae1dSRodney W. Grimes int
17969cdfcefSEdward Tomasz Napierala sys_mmap(struct thread *td, struct mmap_args *uap)
18069cdfcefSEdward Tomasz Napierala {
18169cdfcefSEdward Tomasz Napierala 
182496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
183496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
18469cdfcefSEdward Tomasz Napierala }
18569cdfcefSEdward Tomasz Napierala 
18669cdfcefSEdward Tomasz Napierala int
1875dc7e31aSKonstantin Belousov kern_mmap_maxprot(struct proc *p, int prot)
1885dc7e31aSKonstantin Belousov {
1895dc7e31aSKonstantin Belousov 
1905dc7e31aSKonstantin Belousov 	if ((p->p_flag2 & P2_PROTMAX_DISABLE) != 0 ||
1915dc7e31aSKonstantin Belousov 	    (p->p_fctl0 & NT_FREEBSD_FCTL_PROTMAX_DISABLE) != 0)
1925dc7e31aSKonstantin Belousov 		return (_PROT_ALL);
1935dc7e31aSKonstantin Belousov 	if (((p->p_flag2 & P2_PROTMAX_ENABLE) != 0 || imply_prot_max) &&
1945dc7e31aSKonstantin Belousov 	    prot != PROT_NONE)
1955dc7e31aSKonstantin Belousov 		 return (prot);
1965dc7e31aSKonstantin Belousov 	return (_PROT_ALL);
1975dc7e31aSKonstantin Belousov }
1985dc7e31aSKonstantin Belousov 
1995dc7e31aSKonstantin Belousov int
20077555b84SDoug Moore kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags,
201496ab053SKonstantin Belousov     int fd, off_t pos)
202df8bae1dSRodney W. Grimes {
203d718de81SBrooks Davis 	struct mmap_req mr = {
204d718de81SBrooks Davis 		.mr_hint = addr0,
205d718de81SBrooks Davis 		.mr_len = len,
206d718de81SBrooks Davis 		.mr_prot = prot,
207d718de81SBrooks Davis 		.mr_flags = flags,
208d718de81SBrooks Davis 		.mr_fd = fd,
209d718de81SBrooks Davis 		.mr_pos = pos
210d718de81SBrooks Davis 	};
21118348a23SKyle Evans 
212d718de81SBrooks Davis 	return (kern_mmap_req(td, &mr));
21318348a23SKyle Evans }
21418348a23SKyle Evans 
21518348a23SKyle Evans int
216d718de81SBrooks Davis kern_mmap_req(struct thread *td, const struct mmap_req *mrp)
21718348a23SKyle Evans {
218496ab053SKonstantin Belousov 	struct vmspace *vms;
219c8daea13SAlexander Kabaev 	struct file *fp;
22037306951SKonstantin Belousov 	struct proc *p;
221d718de81SBrooks Davis 	off_t pos;
222d301b358SKonstantin Belousov 	vm_offset_t addr, orig_addr;
223d718de81SBrooks Davis 	vm_size_t len, pageoff, size;
2247077c426SJohn Baldwin 	vm_prot_t cap_maxprot;
225d718de81SBrooks Davis 	int align, error, fd, flags, max_prot, prot;
226a9d2f8d8SRobert Watson 	cap_rights_t rights;
227d718de81SBrooks Davis 	mmap_check_fp_fn check_fp_fn;
228d718de81SBrooks Davis 
229d301b358SKonstantin Belousov 	orig_addr = addr = mrp->mr_hint;
230d718de81SBrooks Davis 	len = mrp->mr_len;
231d718de81SBrooks Davis 	prot = mrp->mr_prot;
232d718de81SBrooks Davis 	flags = mrp->mr_flags;
233d718de81SBrooks Davis 	fd = mrp->mr_fd;
234d718de81SBrooks Davis 	pos = mrp->mr_pos;
235d718de81SBrooks Davis 	check_fp_fn = mrp->mr_check_fp_fn;
236df8bae1dSRodney W. Grimes 
23774a1b66cSBrooks Davis 	if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
23874a1b66cSBrooks Davis 		return (EINVAL);
23974a1b66cSBrooks Davis 	max_prot = PROT_MAX_EXTRACT(prot);
24074a1b66cSBrooks Davis 	prot = PROT_EXTRACT(prot);
24174a1b66cSBrooks Davis 	if (max_prot != 0 && (max_prot & prot) != prot)
242acb8858fSEd Maste 		return (ENOTSUP);
24337306951SKonstantin Belousov 
24437306951SKonstantin Belousov 	p = td->td_proc;
24537306951SKonstantin Belousov 
24674a1b66cSBrooks Davis 	/*
24774a1b66cSBrooks Davis 	 * Always honor PROT_MAX if set.  If not, default to all
24874a1b66cSBrooks Davis 	 * permissions unless we're implying maximum permissions.
24974a1b66cSBrooks Davis 	 */
25074a1b66cSBrooks Davis 	if (max_prot == 0)
2515dc7e31aSKonstantin Belousov 		max_prot = kern_mmap_maxprot(p, prot);
25274a1b66cSBrooks Davis 
25337306951SKonstantin Belousov 	vms = p->p_vmspace;
254426da3bcSAlfred Perlstein 	fp = NULL;
25569cdfcefSEdward Tomasz Napierala 	AUDIT_ARG_FD(fd);
25627bfa958SSimon L. B. Nielsen 
2577707ccabSKonstantin Belousov 	/*
2585817298fSJohn Baldwin 	 * Ignore old flags that used to be defined but did not do anything.
2595817298fSJohn Baldwin 	 */
2605817298fSJohn Baldwin 	flags &= ~(MAP_RESERVED0020 | MAP_RESERVED0040);
2615817298fSJohn Baldwin 
2625817298fSJohn Baldwin 	/*
2637707ccabSKonstantin Belousov 	 * Enforce the constraints.
2647707ccabSKonstantin Belousov 	 * Mapping of length 0 is only allowed for old binaries.
2657707ccabSKonstantin Belousov 	 * Anonymous mapping shall specify -1 as filedescriptor and
2667707ccabSKonstantin Belousov 	 * zero position for new code. Be nice to ancient a.out
2677707ccabSKonstantin Belousov 	 * binaries and correct pos for anonymous mapping, since old
2687707ccabSKonstantin Belousov 	 * ld.so sometimes issues anonymous map requests with non-zero
2697707ccabSKonstantin Belousov 	 * pos.
2707707ccabSKonstantin Belousov 	 */
2717707ccabSKonstantin Belousov 	if (!SV_CURPROC_FLAG(SV_AOUT)) {
27237306951SKonstantin Belousov 		if ((len == 0 && p->p_osrel >= P_OSREL_MAP_ANON) ||
27369cdfcefSEdward Tomasz Napierala 		    ((flags & MAP_ANON) != 0 && (fd != -1 || pos != 0)))
274df8bae1dSRodney W. Grimes 			return (EINVAL);
2757707ccabSKonstantin Belousov 	} else {
2767707ccabSKonstantin Belousov 		if ((flags & MAP_ANON) != 0)
2777707ccabSKonstantin Belousov 			pos = 0;
2787707ccabSKonstantin Belousov 	}
2799154ee6aSPeter Wemm 
2802267af78SJulian Elischer 	if (flags & MAP_STACK) {
28169cdfcefSEdward Tomasz Napierala 		if ((fd != -1) ||
2822267af78SJulian Elischer 		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
2832267af78SJulian Elischer 			return (EINVAL);
2842267af78SJulian Elischer 		flags |= MAP_ANON;
2852267af78SJulian Elischer 		pos = 0;
2862907af2aSJulian Elischer 	}
2875817298fSJohn Baldwin 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
2885817298fSJohn Baldwin 	    MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
28919bd0d9cSKonstantin Belousov 	    MAP_PREFAULT_READ | MAP_GUARD |
2905fd3f8b3SJohn Baldwin #ifdef MAP_32BIT
2915fd3f8b3SJohn Baldwin 	    MAP_32BIT |
2925fd3f8b3SJohn Baldwin #endif
2935fd3f8b3SJohn Baldwin 	    MAP_ALIGNMENT_MASK)) != 0)
2945fd3f8b3SJohn Baldwin 		return (EINVAL);
29511c42bccSKonstantin Belousov 	if ((flags & (MAP_EXCL | MAP_FIXED)) == MAP_EXCL)
29611c42bccSKonstantin Belousov 		return (EINVAL);
29710204535SKonstantin Belousov 	if ((flags & (MAP_SHARED | MAP_PRIVATE)) == (MAP_SHARED | MAP_PRIVATE))
2985fd3f8b3SJohn Baldwin 		return (EINVAL);
2995fd3f8b3SJohn Baldwin 	if (prot != PROT_NONE &&
3005fd3f8b3SJohn Baldwin 	    (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
3015fd3f8b3SJohn Baldwin 		return (EINVAL);
30219bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
30360221a57SAlan Cox 	    pos != 0 || (flags & ~(MAP_FIXED | MAP_GUARD | MAP_EXCL |
304633d3b1cSKonstantin Belousov #ifdef MAP_32BIT
305633d3b1cSKonstantin Belousov 	    MAP_32BIT |
306633d3b1cSKonstantin Belousov #endif
307633d3b1cSKonstantin Belousov 	    MAP_ALIGNMENT_MASK)) != 0))
30819bd0d9cSKonstantin Belousov 		return (EINVAL);
3092907af2aSJulian Elischer 
3109154ee6aSPeter Wemm 	/*
31154f42e4bSPeter Wemm 	 * Align the file position to a page boundary,
31254f42e4bSPeter Wemm 	 * and save its page offset component.
3139154ee6aSPeter Wemm 	 */
31454f42e4bSPeter Wemm 	pageoff = (pos & PAGE_MASK);
31554f42e4bSPeter Wemm 	pos -= pageoff;
31654f42e4bSPeter Wemm 
31777555b84SDoug Moore 	/* Compute size from len by rounding (on both ends). */
31877555b84SDoug Moore 	size = len + pageoff;			/* low end... */
31997220a27SDoug Moore 	size = round_page(size);		/* hi end */
32077555b84SDoug Moore 	/* Check for rounding up to zero. */
321f8c8b2e8SDoug Moore 	if (len > size)
32277555b84SDoug Moore 		return (ENOMEM);
3239154ee6aSPeter Wemm 
3245aa60b6fSJohn Baldwin 	/* Ensure alignment is at least a page and fits in a pointer. */
3255aa60b6fSJohn Baldwin 	align = flags & MAP_ALIGNMENT_MASK;
3265aa60b6fSJohn Baldwin 	if (align != 0 && align != MAP_ALIGNED_SUPER &&
3275aa60b6fSJohn Baldwin 	    (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY ||
3285aa60b6fSJohn Baldwin 	    align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT))
3295aa60b6fSJohn Baldwin 		return (EINVAL);
3305aa60b6fSJohn Baldwin 
331df8bae1dSRodney W. Grimes 	/*
3320d94caffSDavid Greenman 	 * Check for illegal addresses.  Watch out for address wrap... Note
3330d94caffSDavid Greenman 	 * that VM_*_ADDRESS are not constants due to casts (argh).
334df8bae1dSRodney W. Grimes 	 */
335df8bae1dSRodney W. Grimes 	if (flags & MAP_FIXED) {
33654f42e4bSPeter Wemm 		/*
33754f42e4bSPeter Wemm 		 * The specified address must have the same remainder
33854f42e4bSPeter Wemm 		 * as the file offset taken modulo PAGE_SIZE, so it
33954f42e4bSPeter Wemm 		 * should be aligned after adjustment by pageoff.
34054f42e4bSPeter Wemm 		 */
34154f42e4bSPeter Wemm 		addr -= pageoff;
34254f42e4bSPeter Wemm 		if (addr & PAGE_MASK)
34354f42e4bSPeter Wemm 			return (EINVAL);
34427bfa958SSimon L. B. Nielsen 
34554f42e4bSPeter Wemm 		/* Address range must be all in user VM space. */
3460f1e6ec5SMark Johnston 		if (!vm_map_range_valid(&vms->vm_map, addr, addr + size))
347df8bae1dSRodney W. Grimes 			return (EINVAL);
348edb572a3SJohn Baldwin #ifdef MAP_32BIT
349edb572a3SJohn Baldwin 		if (flags & MAP_32BIT && addr + size > MAP_32BIT_MAX_ADDR)
350edb572a3SJohn Baldwin 			return (EINVAL);
351edb572a3SJohn Baldwin 	} else if (flags & MAP_32BIT) {
352edb572a3SJohn Baldwin 		/*
353edb572a3SJohn Baldwin 		 * For MAP_32BIT, override the hint if it is too high and
354edb572a3SJohn Baldwin 		 * do not bother moving the mapping past the heap (since
355edb572a3SJohn Baldwin 		 * the heap is usually above 2GB).
356edb572a3SJohn Baldwin 		 */
357edb572a3SJohn Baldwin 		if (addr + size > MAP_32BIT_MAX_ADDR)
358edb572a3SJohn Baldwin 			addr = 0;
359edb572a3SJohn Baldwin #endif
36091d5354aSJohn Baldwin 	} else {
361df8bae1dSRodney W. Grimes 		/*
36254f42e4bSPeter Wemm 		 * XXX for non-fixed mappings where no hint is provided or
36354f42e4bSPeter Wemm 		 * the hint would fall in the potential heap space,
36454f42e4bSPeter Wemm 		 * place it after the end of the largest possible heap.
365df8bae1dSRodney W. Grimes 		 *
36654f42e4bSPeter Wemm 		 * There should really be a pmap call to determine a reasonable
36754f42e4bSPeter Wemm 		 * location.
368df8bae1dSRodney W. Grimes 		 */
36991d5354aSJohn Baldwin 		if (addr == 0 ||
3701f6889a1SMatthew Dillon 		    (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
371c460ac3aSPeter Wemm 		    addr < round_page((vm_offset_t)vms->vm_daddr +
372cd336badSMateusz Guzik 		    lim_max(td, RLIMIT_DATA))))
373c460ac3aSPeter Wemm 			addr = round_page((vm_offset_t)vms->vm_daddr +
374cd336badSMateusz Guzik 			    lim_max(td, RLIMIT_DATA));
37591d5354aSJohn Baldwin 	}
37677555b84SDoug Moore 	if (len == 0) {
3777077c426SJohn Baldwin 		/*
3787077c426SJohn Baldwin 		 * Return success without mapping anything for old
3797077c426SJohn Baldwin 		 * binaries that request a page-aligned mapping of
3807077c426SJohn Baldwin 		 * length 0.  For modern binaries, this function
3817077c426SJohn Baldwin 		 * returns an error earlier.
3827077c426SJohn Baldwin 		 */
3837077c426SJohn Baldwin 		error = 0;
38419bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_GUARD) != 0) {
38519bd0d9cSKonstantin Belousov 		error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
38619bd0d9cSKonstantin Belousov 		    VM_PROT_NONE, flags, NULL, pos, FALSE, td);
38719bd0d9cSKonstantin Belousov 	} else if ((flags & MAP_ANON) != 0) {
388df8bae1dSRodney W. Grimes 		/*
389df8bae1dSRodney W. Grimes 		 * Mapping blank space is trivial.
3907077c426SJohn Baldwin 		 *
3917077c426SJohn Baldwin 		 * This relies on VM_PROT_* matching PROT_*.
392df8bae1dSRodney W. Grimes 		 */
3937077c426SJohn Baldwin 		error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
39474a1b66cSBrooks Davis 		    max_prot, flags, NULL, pos, FALSE, td);
39530d4dd7eSAlexander Kabaev 	} else {
396df8bae1dSRodney W. Grimes 		/*
397a9d2f8d8SRobert Watson 		 * Mapping file, get fp for validation and don't let the
398a9d2f8d8SRobert Watson 		 * descriptor disappear on us if we block. Check capability
399a9d2f8d8SRobert Watson 		 * rights, but also return the maximum rights to be combined
400a9d2f8d8SRobert Watson 		 * with maxprot later.
401df8bae1dSRodney W. Grimes 		 */
4023379d2f9SMateusz Guzik 		cap_rights_init_one(&rights, CAP_MMAP);
403a9d2f8d8SRobert Watson 		if (prot & PROT_READ)
4043379d2f9SMateusz Guzik 			cap_rights_set_one(&rights, CAP_MMAP_R);
405a9d2f8d8SRobert Watson 		if ((flags & MAP_SHARED) != 0) {
406a9d2f8d8SRobert Watson 			if (prot & PROT_WRITE)
4073379d2f9SMateusz Guzik 				cap_rights_set_one(&rights, CAP_MMAP_W);
408a9d2f8d8SRobert Watson 		}
409a9d2f8d8SRobert Watson 		if (prot & PROT_EXEC)
4103379d2f9SMateusz Guzik 			cap_rights_set_one(&rights, CAP_MMAP_X);
41169cdfcefSEdward Tomasz Napierala 		error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp);
4127008be5bSPawel Jakub Dawidek 		if (error != 0)
413426da3bcSAlfred Perlstein 			goto done;
41410204535SKonstantin Belousov 		if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 &&
41537306951SKonstantin Belousov 		    p->p_osrel >= P_OSREL_MAP_FSTRICT) {
41610204535SKonstantin Belousov 			error = EINVAL;
41710204535SKonstantin Belousov 			goto done;
41810204535SKonstantin Belousov 		}
41918348a23SKyle Evans 		if (check_fp_fn != NULL) {
42018348a23SKyle Evans 			error = check_fp_fn(fp, prot, max_prot & cap_maxprot,
42118348a23SKyle Evans 			    flags);
42218348a23SKyle Evans 			if (error != 0)
42318348a23SKyle Evans 				goto done;
42418348a23SKyle Evans 		}
425d301b358SKonstantin Belousov 		if (fp->f_ops == &shm_ops && shm_largepage(fp->f_data))
426d301b358SKonstantin Belousov 			addr = orig_addr;
4275fd3f8b3SJohn Baldwin 		/* This relies on VM_PROT_* matching PROT_*. */
4287077c426SJohn Baldwin 		error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
42974a1b66cSBrooks Davis 		    max_prot & cap_maxprot, flags, pos, td);
43049874f6eSJoseph Koshy 	}
4317077c426SJohn Baldwin 
432df8bae1dSRodney W. Grimes 	if (error == 0)
433b40ce416SJulian Elischer 		td->td_retval[0] = (register_t) (addr + pageoff);
434279d7226SMatthew Dillon done:
435279d7226SMatthew Dillon 	if (fp)
436b40ce416SJulian Elischer 		fdrop(fp, td);
437f6b5b182SJeff Roberson 
438df8bae1dSRodney W. Grimes 	return (error);
439df8bae1dSRodney W. Grimes }
440df8bae1dSRodney W. Grimes 
4410538aafcSKonstantin Belousov #if defined(COMPAT_FREEBSD6)
442c2815ad5SPeter Wemm int
443c2815ad5SPeter Wemm freebsd6_mmap(struct thread *td, struct freebsd6_mmap_args *uap)
444c2815ad5SPeter Wemm {
445c2815ad5SPeter Wemm 
446496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, uap->prot,
447496ab053SKonstantin Belousov 	    uap->flags, uap->fd, uap->pos));
448c2815ad5SPeter Wemm }
4490538aafcSKonstantin Belousov #endif
450c2815ad5SPeter Wemm 
45105f0fdd2SPoul-Henning Kamp #ifdef COMPAT_43
452d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
45305f0fdd2SPoul-Henning Kamp struct ommap_args {
45405f0fdd2SPoul-Henning Kamp 	caddr_t addr;
45505f0fdd2SPoul-Henning Kamp 	int len;
45605f0fdd2SPoul-Henning Kamp 	int prot;
45705f0fdd2SPoul-Henning Kamp 	int flags;
45805f0fdd2SPoul-Henning Kamp 	int fd;
45905f0fdd2SPoul-Henning Kamp 	long pos;
46005f0fdd2SPoul-Henning Kamp };
461d2d3e875SBruce Evans #endif
46205f0fdd2SPoul-Henning Kamp int
46369cdfcefSEdward Tomasz Napierala ommap(struct thread *td, struct ommap_args *uap)
46405f0fdd2SPoul-Henning Kamp {
46505f0fdd2SPoul-Henning Kamp 	static const char cvtbsdprot[8] = {
46605f0fdd2SPoul-Henning Kamp 		0,
46705f0fdd2SPoul-Henning Kamp 		PROT_EXEC,
46805f0fdd2SPoul-Henning Kamp 		PROT_WRITE,
46905f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE,
47005f0fdd2SPoul-Henning Kamp 		PROT_READ,
47105f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_READ,
47205f0fdd2SPoul-Henning Kamp 		PROT_WRITE | PROT_READ,
47305f0fdd2SPoul-Henning Kamp 		PROT_EXEC | PROT_WRITE | PROT_READ,
47405f0fdd2SPoul-Henning Kamp 	};
47569cdfcefSEdward Tomasz Napierala 	int flags, prot;
4760d94caffSDavid Greenman 
47705f0fdd2SPoul-Henning Kamp #define	OMAP_ANON	0x0002
47805f0fdd2SPoul-Henning Kamp #define	OMAP_COPY	0x0020
47905f0fdd2SPoul-Henning Kamp #define	OMAP_SHARED	0x0010
48005f0fdd2SPoul-Henning Kamp #define	OMAP_FIXED	0x0100
48105f0fdd2SPoul-Henning Kamp 
48269cdfcefSEdward Tomasz Napierala 	prot = cvtbsdprot[uap->prot & 0x7];
4835dddee2dSKonstantin Belousov #if (defined(COMPAT_FREEBSD32) && defined(__amd64__)) || defined(__i386__)
484ee4116b8SKonstantin Belousov 	if (i386_read_exec && SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
48569cdfcefSEdward Tomasz Napierala 	    prot != 0)
48669cdfcefSEdward Tomasz Napierala 		prot |= PROT_EXEC;
487ee4116b8SKonstantin Belousov #endif
48869cdfcefSEdward Tomasz Napierala 	flags = 0;
48905f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_ANON)
49069cdfcefSEdward Tomasz Napierala 		flags |= MAP_ANON;
49105f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_COPY)
49269cdfcefSEdward Tomasz Napierala 		flags |= MAP_COPY;
49305f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_SHARED)
49469cdfcefSEdward Tomasz Napierala 		flags |= MAP_SHARED;
49505f0fdd2SPoul-Henning Kamp 	else
49669cdfcefSEdward Tomasz Napierala 		flags |= MAP_PRIVATE;
49705f0fdd2SPoul-Henning Kamp 	if (uap->flags & OMAP_FIXED)
49869cdfcefSEdward Tomasz Napierala 		flags |= MAP_FIXED;
499496ab053SKonstantin Belousov 	return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, flags,
500496ab053SKonstantin Belousov 	    uap->fd, uap->pos));
50105f0fdd2SPoul-Henning Kamp }
50205f0fdd2SPoul-Henning Kamp #endif				/* COMPAT_43 */
50305f0fdd2SPoul-Henning Kamp 
504d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
505df8bae1dSRodney W. Grimes struct msync_args {
506651bb817SAlexander Langer 	void *addr;
507c899450bSPeter Wemm 	size_t len;
508e6c6af11SDavid Greenman 	int flags;
509df8bae1dSRodney W. Grimes };
510d2d3e875SBruce Evans #endif
511df8bae1dSRodney W. Grimes int
51269cdfcefSEdward Tomasz Napierala sys_msync(struct thread *td, struct msync_args *uap)
513df8bae1dSRodney W. Grimes {
51469cdfcefSEdward Tomasz Napierala 
515496ab053SKonstantin Belousov 	return (kern_msync(td, (uintptr_t)uap->addr, uap->len, uap->flags));
51669cdfcefSEdward Tomasz Napierala }
51769cdfcefSEdward Tomasz Napierala 
51869cdfcefSEdward Tomasz Napierala int
519496ab053SKonstantin Belousov kern_msync(struct thread *td, uintptr_t addr0, size_t size, int flags)
52069cdfcefSEdward Tomasz Napierala {
521496ab053SKonstantin Belousov 	vm_offset_t addr;
52269cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
523df8bae1dSRodney W. Grimes 	vm_map_t map;
524df8bae1dSRodney W. Grimes 	int rv;
525df8bae1dSRodney W. Grimes 
526496ab053SKonstantin Belousov 	addr = addr0;
527dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
528dabee6feSPeter Wemm 	addr -= pageoff;
529dabee6feSPeter Wemm 	size += pageoff;
530dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
5319154ee6aSPeter Wemm 	if (addr + size < addr)
532dabee6feSPeter Wemm 		return (EINVAL);
533dabee6feSPeter Wemm 
534dabee6feSPeter Wemm 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
5351e62bc63SDavid Greenman 		return (EINVAL);
5361e62bc63SDavid Greenman 
537b40ce416SJulian Elischer 	map = &td->td_proc->p_vmspace->vm_map;
5389154ee6aSPeter Wemm 
539df8bae1dSRodney W. Grimes 	/*
540df8bae1dSRodney W. Grimes 	 * Clean the pages and interpret the return value.
541df8bae1dSRodney W. Grimes 	 */
542950f8459SAlan Cox 	rv = vm_map_sync(map, addr, addr + size, (flags & MS_ASYNC) == 0,
543e6c6af11SDavid Greenman 	    (flags & MS_INVALIDATE) != 0);
544df8bae1dSRodney W. Grimes 	switch (rv) {
545df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
546d2c60af8SMatthew Dillon 		return (0);
547df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
548e103f5b1SPeter Holm 		return (ENOMEM);
549b7b7cd44SAlan Cox 	case KERN_INVALID_ARGUMENT:
550b7b7cd44SAlan Cox 		return (EBUSY);
551126d6082SKonstantin Belousov 	case KERN_FAILURE:
552126d6082SKonstantin Belousov 		return (EIO);
553df8bae1dSRodney W. Grimes 	default:
554df8bae1dSRodney W. Grimes 		return (EINVAL);
555df8bae1dSRodney W. Grimes 	}
556df8bae1dSRodney W. Grimes }
557df8bae1dSRodney W. Grimes 
558d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
559df8bae1dSRodney W. Grimes struct munmap_args {
560651bb817SAlexander Langer 	void *addr;
5619154ee6aSPeter Wemm 	size_t len;
562df8bae1dSRodney W. Grimes };
563d2d3e875SBruce Evans #endif
564df8bae1dSRodney W. Grimes int
56569cdfcefSEdward Tomasz Napierala sys_munmap(struct thread *td, struct munmap_args *uap)
56669cdfcefSEdward Tomasz Napierala {
56769cdfcefSEdward Tomasz Napierala 
568496ab053SKonstantin Belousov 	return (kern_munmap(td, (uintptr_t)uap->addr, uap->len));
56969cdfcefSEdward Tomasz Napierala }
57069cdfcefSEdward Tomasz Napierala 
57169cdfcefSEdward Tomasz Napierala int
572496ab053SKonstantin Belousov kern_munmap(struct thread *td, uintptr_t addr0, size_t size)
573df8bae1dSRodney W. Grimes {
57449874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
57549874f6eSJoseph Koshy 	struct pmckern_map_out pkm;
57649874f6eSJoseph Koshy 	vm_map_entry_t entry;
577736ff8c3SMateusz Guzik 	bool pmc_handled;
57849874f6eSJoseph Koshy #endif
5790f1e6ec5SMark Johnston 	vm_offset_t addr, end;
58069cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
581df8bae1dSRodney W. Grimes 	vm_map_t map;
582e8f77c20SKonstantin Belousov 	int rv;
583df8bae1dSRodney W. Grimes 
584d8834602SAlan Cox 	if (size == 0)
585d8834602SAlan Cox 		return (EINVAL);
586dabee6feSPeter Wemm 
587496ab053SKonstantin Belousov 	addr = addr0;
588dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
589dabee6feSPeter Wemm 	addr -= pageoff;
590dabee6feSPeter Wemm 	size += pageoff;
591dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
5920f1e6ec5SMark Johnston 	end = addr + size;
5930f1e6ec5SMark Johnston 	map = &td->td_proc->p_vmspace->vm_map;
5940f1e6ec5SMark Johnston 	if (!vm_map_range_valid(map, addr, end))
595df8bae1dSRodney W. Grimes 		return (EINVAL);
5969154ee6aSPeter Wemm 
597d8834602SAlan Cox 	vm_map_lock(map);
59849874f6eSJoseph Koshy #ifdef HWPMC_HOOKS
599736ff8c3SMateusz Guzik 	pmc_handled = false;
600736ff8c3SMateusz Guzik 	if (PMC_HOOK_INSTALLED(PMC_FN_MUNMAP)) {
601736ff8c3SMateusz Guzik 		pmc_handled = true;
60249874f6eSJoseph Koshy 		/*
60349874f6eSJoseph Koshy 		 * Inform hwpmc if the address range being unmapped contains
60449874f6eSJoseph Koshy 		 * an executable region.
60549874f6eSJoseph Koshy 		 */
6060d419640SRyan Stone 		pkm.pm_address = (uintptr_t) NULL;
60749874f6eSJoseph Koshy 		if (vm_map_lookup_entry(map, addr, &entry)) {
6080f1e6ec5SMark Johnston 			for (; entry->start < end;
6097cdcf863SDoug Moore 			    entry = vm_map_entry_succ(entry)) {
61049874f6eSJoseph Koshy 				if (vm_map_check_protection(map, entry->start,
61149874f6eSJoseph Koshy 					entry->end, VM_PROT_EXECUTE) == TRUE) {
61249874f6eSJoseph Koshy 					pkm.pm_address = (uintptr_t) addr;
61349874f6eSJoseph Koshy 					pkm.pm_size = (size_t) size;
61449874f6eSJoseph Koshy 					break;
61549874f6eSJoseph Koshy 				}
61649874f6eSJoseph Koshy 			}
61749874f6eSJoseph Koshy 		}
618736ff8c3SMateusz Guzik 	}
61949874f6eSJoseph Koshy #endif
620e8f77c20SKonstantin Belousov 	rv = vm_map_delete(map, addr, end);
6210d419640SRyan Stone 
6220d419640SRyan Stone #ifdef HWPMC_HOOKS
623e8f77c20SKonstantin Belousov 	if (rv == KERN_SUCCESS && __predict_false(pmc_handled)) {
6240d419640SRyan Stone 		/* downgrade the lock to prevent a LOR with the pmc-sx lock */
6250d419640SRyan Stone 		vm_map_lock_downgrade(map);
626d473d3a1SRyan Stone 		if (pkm.pm_address != (uintptr_t) NULL)
6270d419640SRyan Stone 			PMC_CALL_HOOK(td, PMC_FN_MUNMAP, (void *) &pkm);
6280d419640SRyan Stone 		vm_map_unlock_read(map);
629736ff8c3SMateusz Guzik 	} else
6300d419640SRyan Stone #endif
631736ff8c3SMateusz Guzik 		vm_map_unlock(map);
632736ff8c3SMateusz Guzik 
633e8f77c20SKonstantin Belousov 	return (vm_mmap_to_errno(rv));
634df8bae1dSRodney W. Grimes }
635df8bae1dSRodney W. Grimes 
636d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
637df8bae1dSRodney W. Grimes struct mprotect_args {
638651bb817SAlexander Langer 	const void *addr;
6399154ee6aSPeter Wemm 	size_t len;
640df8bae1dSRodney W. Grimes 	int prot;
641df8bae1dSRodney W. Grimes };
642d2d3e875SBruce Evans #endif
643df8bae1dSRodney W. Grimes int
64469cdfcefSEdward Tomasz Napierala sys_mprotect(struct thread *td, struct mprotect_args *uap)
645df8bae1dSRodney W. Grimes {
646df8bae1dSRodney W. Grimes 
647496ab053SKonstantin Belousov 	return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot));
64869cdfcefSEdward Tomasz Napierala }
649df8bae1dSRodney W. Grimes 
65069cdfcefSEdward Tomasz Napierala int
651496ab053SKonstantin Belousov kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot)
65269cdfcefSEdward Tomasz Napierala {
653496ab053SKonstantin Belousov 	vm_offset_t addr;
65469cdfcefSEdward Tomasz Napierala 	vm_size_t pageoff;
65574a1b66cSBrooks Davis 	int vm_error, max_prot;
656*0659df6fSKonstantin Belousov 	int flags;
65769cdfcefSEdward Tomasz Napierala 
658496ab053SKonstantin Belousov 	addr = addr0;
65974a1b66cSBrooks Davis 	if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
66074a1b66cSBrooks Davis 		return (EINVAL);
66174a1b66cSBrooks Davis 	max_prot = PROT_MAX_EXTRACT(prot);
66274a1b66cSBrooks Davis 	prot = PROT_EXTRACT(prot);
663dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
664dabee6feSPeter Wemm 	addr -= pageoff;
665dabee6feSPeter Wemm 	size += pageoff;
666dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
6676e1d2cf6SKonstantin Belousov #ifdef COMPAT_FREEBSD32
6686e1d2cf6SKonstantin Belousov 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
6696e1d2cf6SKonstantin Belousov 		if (((addr + size) & 0xffffffff) < addr)
6706e1d2cf6SKonstantin Belousov 			return (EINVAL);
6716e1d2cf6SKonstantin Belousov 	} else
6726e1d2cf6SKonstantin Belousov #endif
6739154ee6aSPeter Wemm 	if (addr + size < addr)
674dabee6feSPeter Wemm 		return (EINVAL);
675dabee6feSPeter Wemm 
676*0659df6fSKonstantin Belousov 	flags = VM_MAP_PROTECT_SET_PROT;
677*0659df6fSKonstantin Belousov 	if (max_prot != 0)
678*0659df6fSKonstantin Belousov 		flags |= VM_MAP_PROTECT_SET_MAXPROT;
67974a1b66cSBrooks Davis 	vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
680*0659df6fSKonstantin Belousov 	    addr, addr + size, prot, max_prot, flags);
68174a1b66cSBrooks Davis 
68274a1b66cSBrooks Davis 	switch (vm_error) {
683df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
684df8bae1dSRodney W. Grimes 		return (0);
685df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
686df8bae1dSRodney W. Grimes 		return (EACCES);
6873364c323SKonstantin Belousov 	case KERN_RESOURCE_SHORTAGE:
6883364c323SKonstantin Belousov 		return (ENOMEM);
689*0659df6fSKonstantin Belousov 	case KERN_OUT_OF_BOUNDS:
690*0659df6fSKonstantin Belousov 		return (ENOTSUP);
691df8bae1dSRodney W. Grimes 	}
692df8bae1dSRodney W. Grimes 	return (EINVAL);
693df8bae1dSRodney W. Grimes }
694df8bae1dSRodney W. Grimes 
695d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
696dabee6feSPeter Wemm struct minherit_args {
697651bb817SAlexander Langer 	void *addr;
6989154ee6aSPeter Wemm 	size_t len;
699dabee6feSPeter Wemm 	int inherit;
700dabee6feSPeter Wemm };
701dabee6feSPeter Wemm #endif
702dabee6feSPeter Wemm int
70304e89ffbSKonstantin Belousov sys_minherit(struct thread *td, struct minherit_args *uap)
704dabee6feSPeter Wemm {
70552c81be1SEdward Tomasz Napierala 
70652c81be1SEdward Tomasz Napierala 	return (kern_minherit(td, (uintptr_t)uap->addr, uap->len,
70752c81be1SEdward Tomasz Napierala 	    uap->inherit));
70852c81be1SEdward Tomasz Napierala }
70952c81be1SEdward Tomasz Napierala 
71052c81be1SEdward Tomasz Napierala int
71152c81be1SEdward Tomasz Napierala kern_minherit(struct thread *td, uintptr_t addr0, size_t len, int inherit0)
71252c81be1SEdward Tomasz Napierala {
713dabee6feSPeter Wemm 	vm_offset_t addr;
714dabee6feSPeter Wemm 	vm_size_t size, pageoff;
71554d92145SMatthew Dillon 	vm_inherit_t inherit;
716dabee6feSPeter Wemm 
71752c81be1SEdward Tomasz Napierala 	addr = (vm_offset_t)addr0;
71852c81be1SEdward Tomasz Napierala 	size = len;
71952c81be1SEdward Tomasz Napierala 	inherit = inherit0;
720dabee6feSPeter Wemm 
721dabee6feSPeter Wemm 	pageoff = (addr & PAGE_MASK);
722dabee6feSPeter Wemm 	addr -= pageoff;
723dabee6feSPeter Wemm 	size += pageoff;
724dabee6feSPeter Wemm 	size = (vm_size_t) round_page(size);
7259154ee6aSPeter Wemm 	if (addr + size < addr)
726dabee6feSPeter Wemm 		return (EINVAL);
727dabee6feSPeter Wemm 
728e0be79afSAlan Cox 	switch (vm_map_inherit(&td->td_proc->p_vmspace->vm_map, addr,
729e0be79afSAlan Cox 	    addr + size, inherit)) {
730dabee6feSPeter Wemm 	case KERN_SUCCESS:
731dabee6feSPeter Wemm 		return (0);
732dabee6feSPeter Wemm 	case KERN_PROTECTION_FAILURE:
733dabee6feSPeter Wemm 		return (EACCES);
734dabee6feSPeter Wemm 	}
735dabee6feSPeter Wemm 	return (EINVAL);
736dabee6feSPeter Wemm }
737dabee6feSPeter Wemm 
738dabee6feSPeter Wemm #ifndef _SYS_SYSPROTO_H_
739df8bae1dSRodney W. Grimes struct madvise_args {
740651bb817SAlexander Langer 	void *addr;
7419154ee6aSPeter Wemm 	size_t len;
742df8bae1dSRodney W. Grimes 	int behav;
743df8bae1dSRodney W. Grimes };
744d2d3e875SBruce Evans #endif
7450d94caffSDavid Greenman 
746df8bae1dSRodney W. Grimes int
74704e89ffbSKonstantin Belousov sys_madvise(struct thread *td, struct madvise_args *uap)
748df8bae1dSRodney W. Grimes {
74969cdfcefSEdward Tomasz Napierala 
750496ab053SKonstantin Belousov 	return (kern_madvise(td, (uintptr_t)uap->addr, uap->len, uap->behav));
75169cdfcefSEdward Tomasz Napierala }
75269cdfcefSEdward Tomasz Napierala 
75369cdfcefSEdward Tomasz Napierala int
754496ab053SKonstantin Belousov kern_madvise(struct thread *td, uintptr_t addr0, size_t len, int behav)
75569cdfcefSEdward Tomasz Napierala {
75605ba50f5SJake Burkholder 	vm_map_t map;
757496ab053SKonstantin Belousov 	vm_offset_t addr, end, start;
75855648840SJohn Baldwin 	int flags;
759b4309055SMatthew Dillon 
760b4309055SMatthew Dillon 	/*
761f4cf2141SWes Peters 	 * Check for our special case, advising the swap pager we are
762f4cf2141SWes Peters 	 * "immortal."
763f4cf2141SWes Peters 	 */
76469cdfcefSEdward Tomasz Napierala 	if (behav == MADV_PROTECT) {
76555648840SJohn Baldwin 		flags = PPROT_SET;
76655648840SJohn Baldwin 		return (kern_procctl(td, P_PID, td->td_proc->p_pid,
76755648840SJohn Baldwin 		    PROC_SPROTECT, &flags));
76869297bf8SJohn Baldwin 	}
76955648840SJohn Baldwin 
770f4cf2141SWes Peters 	/*
771867a482dSJohn Dyson 	 * Check for illegal addresses.  Watch out for address wrap... Note
772867a482dSJohn Dyson 	 * that VM_*_ADDRESS are not constants due to casts (argh).
773867a482dSJohn Dyson 	 */
77405ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
775496ab053SKonstantin Belousov 	addr = addr0;
7760f1e6ec5SMark Johnston 	if (!vm_map_range_valid(map, addr, addr + len))
777867a482dSJohn Dyson 		return (EINVAL);
778867a482dSJohn Dyson 
779867a482dSJohn Dyson 	/*
780867a482dSJohn Dyson 	 * Since this routine is only advisory, we default to conservative
781867a482dSJohn Dyson 	 * behavior.
782867a482dSJohn Dyson 	 */
78369cdfcefSEdward Tomasz Napierala 	start = trunc_page(addr);
78469cdfcefSEdward Tomasz Napierala 	end = round_page(addr + len);
785867a482dSJohn Dyson 
7863e7cb27cSAlan Cox 	/*
7873e7cb27cSAlan Cox 	 * vm_map_madvise() checks for illegal values of behav.
7883e7cb27cSAlan Cox 	 */
7893e7cb27cSAlan Cox 	return (vm_map_madvise(map, start, end, behav));
790df8bae1dSRodney W. Grimes }
791df8bae1dSRodney W. Grimes 
792d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
793df8bae1dSRodney W. Grimes struct mincore_args {
794651bb817SAlexander Langer 	const void *addr;
7959154ee6aSPeter Wemm 	size_t len;
796df8bae1dSRodney W. Grimes 	char *vec;
797df8bae1dSRodney W. Grimes };
798d2d3e875SBruce Evans #endif
7990d94caffSDavid Greenman 
800df8bae1dSRodney W. Grimes int
80104e89ffbSKonstantin Belousov sys_mincore(struct thread *td, struct mincore_args *uap)
802df8bae1dSRodney W. Grimes {
80346dc8e9dSDmitry Chagin 
80446dc8e9dSDmitry Chagin 	return (kern_mincore(td, (uintptr_t)uap->addr, uap->len, uap->vec));
80546dc8e9dSDmitry Chagin }
80646dc8e9dSDmitry Chagin 
80746dc8e9dSDmitry Chagin int
80846dc8e9dSDmitry Chagin kern_mincore(struct thread *td, uintptr_t addr0, size_t len, char *vec)
80946dc8e9dSDmitry Chagin {
810867a482dSJohn Dyson 	pmap_t pmap;
811867a482dSJohn Dyson 	vm_map_t map;
81201cef4caSMark Johnston 	vm_map_entry_t current, entry;
813567e51e1SAlan Cox 	vm_object_t object;
81401cef4caSMark Johnston 	vm_offset_t addr, cend, end, first_addr;
81501cef4caSMark Johnston 	vm_paddr_t pa;
816567e51e1SAlan Cox 	vm_page_t m;
817567e51e1SAlan Cox 	vm_pindex_t pindex;
81801cef4caSMark Johnston 	int error, lastvecindex, mincoreinfo, vecindex;
819dd2622a8SAlan Cox 	unsigned int timestamp;
820df8bae1dSRodney W. Grimes 
821867a482dSJohn Dyson 	/*
822867a482dSJohn Dyson 	 * Make sure that the addresses presented are valid for user
823867a482dSJohn Dyson 	 * mode.
824867a482dSJohn Dyson 	 */
82546dc8e9dSDmitry Chagin 	first_addr = addr = trunc_page(addr0);
826d0c9294bSMark Johnston 	end = round_page(addr0 + len);
82705ba50f5SJake Burkholder 	map = &td->td_proc->p_vmspace->vm_map;
82805ba50f5SJake Burkholder 	if (end > vm_map_max(map) || end < addr)
829455dd7d4SKonstantin Belousov 		return (ENOMEM);
83002c04a2fSJohn Dyson 
831b40ce416SJulian Elischer 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
832867a482dSJohn Dyson 
833eff50fcdSAlan Cox 	vm_map_lock_read(map);
834dd2622a8SAlan Cox RestartScan:
835dd2622a8SAlan Cox 	timestamp = map->timestamp;
836867a482dSJohn Dyson 
837455dd7d4SKonstantin Belousov 	if (!vm_map_lookup_entry(map, addr, &entry)) {
838455dd7d4SKonstantin Belousov 		vm_map_unlock_read(map);
839455dd7d4SKonstantin Belousov 		return (ENOMEM);
840455dd7d4SKonstantin Belousov 	}
841867a482dSJohn Dyson 
842867a482dSJohn Dyson 	/*
843867a482dSJohn Dyson 	 * Do this on a map entry basis so that if the pages are not
844867a482dSJohn Dyson 	 * in the current processes address space, we can easily look
845867a482dSJohn Dyson 	 * up the pages elsewhere.
846867a482dSJohn Dyson 	 */
847867a482dSJohn Dyson 	lastvecindex = -1;
8487cdcf863SDoug Moore 	while (entry->start < end) {
849867a482dSJohn Dyson 		/*
850455dd7d4SKonstantin Belousov 		 * check for contiguity
851455dd7d4SKonstantin Belousov 		 */
8527cdcf863SDoug Moore 		current = entry;
8537cdcf863SDoug Moore 		entry = vm_map_entry_succ(current);
8547cdcf863SDoug Moore 		if (current->end < end &&
8557cdcf863SDoug Moore 		    entry->start > current->end) {
856455dd7d4SKonstantin Belousov 			vm_map_unlock_read(map);
857455dd7d4SKonstantin Belousov 			return (ENOMEM);
858455dd7d4SKonstantin Belousov 		}
859455dd7d4SKonstantin Belousov 
860455dd7d4SKonstantin Belousov 		/*
861867a482dSJohn Dyson 		 * ignore submaps (for now) or null objects
862867a482dSJohn Dyson 		 */
8639fdfe602SMatthew Dillon 		if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
864867a482dSJohn Dyson 		    current->object.vm_object == NULL)
865867a482dSJohn Dyson 			continue;
866867a482dSJohn Dyson 
867867a482dSJohn Dyson 		/*
868867a482dSJohn Dyson 		 * limit this scan to the current map entry and the
869867a482dSJohn Dyson 		 * limits for the mincore call
870867a482dSJohn Dyson 		 */
871867a482dSJohn Dyson 		if (addr < current->start)
872867a482dSJohn Dyson 			addr = current->start;
873867a482dSJohn Dyson 		cend = current->end;
874867a482dSJohn Dyson 		if (cend > end)
875867a482dSJohn Dyson 			cend = end;
876867a482dSJohn Dyson 
87701cef4caSMark Johnston 		for (; addr < cend; addr += PAGE_SIZE) {
878867a482dSJohn Dyson 			/*
879867a482dSJohn Dyson 			 * Check pmap first, it is likely faster, also
880867a482dSJohn Dyson 			 * it can provide info as to whether we are the
881867a482dSJohn Dyson 			 * one referencing or modifying the page.
882867a482dSJohn Dyson 			 */
883567e51e1SAlan Cox 			m = NULL;
88401cef4caSMark Johnston 			object = NULL;
88501cef4caSMark Johnston retry:
88601cef4caSMark Johnston 			pa = 0;
88701cef4caSMark Johnston 			mincoreinfo = pmap_mincore(pmap, addr, &pa);
8883fbc2e00SKonstantin Belousov 			if (mincore_mapped) {
8893fbc2e00SKonstantin Belousov 				/*
8903fbc2e00SKonstantin Belousov 				 * We only care about this pmap's
8913fbc2e00SKonstantin Belousov 				 * mapping of the page, if any.
8923fbc2e00SKonstantin Belousov 				 */
89301cef4caSMark Johnston 				;
89401cef4caSMark Johnston 			} else if (pa != 0) {
895867a482dSJohn Dyson 				/*
896567e51e1SAlan Cox 				 * The page is mapped by this process but not
897567e51e1SAlan Cox 				 * both accessed and modified.  It is also
898567e51e1SAlan Cox 				 * managed.  Acquire the object lock so that
89901cef4caSMark Johnston 				 * other mappings might be examined.  The page's
90001cef4caSMark Johnston 				 * identity may change at any point before its
90101cef4caSMark Johnston 				 * object lock is acquired, so re-validate if
90201cef4caSMark Johnston 				 * necessary.
903867a482dSJohn Dyson 				 */
90401cef4caSMark Johnston 				m = PHYS_TO_VM_PAGE(pa);
90501cef4caSMark Johnston 				while (object == NULL || m->object != object) {
906567e51e1SAlan Cox 					if (object != NULL)
90789f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
90823ed568cSMateusz Guzik 					object = atomic_load_ptr(&m->object);
90901cef4caSMark Johnston 					if (object == NULL)
910567e51e1SAlan Cox 						goto retry;
91101cef4caSMark Johnston 					VM_OBJECT_WLOCK(object);
912567e51e1SAlan Cox 				}
91301cef4caSMark Johnston 				if (pa != pmap_extract(pmap, addr))
91401cef4caSMark Johnston 					goto retry;
9150012f373SJeff Roberson 				KASSERT(vm_page_all_valid(m),
916567e51e1SAlan Cox 				    ("mincore: page %p is mapped but invalid",
917567e51e1SAlan Cox 				    m));
918567e51e1SAlan Cox 			} else if (mincoreinfo == 0) {
919567e51e1SAlan Cox 				/*
920567e51e1SAlan Cox 				 * The page is not mapped by this process.  If
921567e51e1SAlan Cox 				 * the object implements managed pages, then
922567e51e1SAlan Cox 				 * determine if the page is resident so that
923567e51e1SAlan Cox 				 * the mappings might be examined.
924567e51e1SAlan Cox 				 */
925567e51e1SAlan Cox 				if (current->object.vm_object != object) {
926567e51e1SAlan Cox 					if (object != NULL)
92789f6b863SAttilio Rao 						VM_OBJECT_WUNLOCK(object);
928567e51e1SAlan Cox 					object = current->object.vm_object;
92989f6b863SAttilio Rao 					VM_OBJECT_WLOCK(object);
930567e51e1SAlan Cox 				}
931567e51e1SAlan Cox 				if (object->type == OBJT_DEFAULT ||
932567e51e1SAlan Cox 				    object->type == OBJT_SWAP ||
933567e51e1SAlan Cox 				    object->type == OBJT_VNODE) {
934567e51e1SAlan Cox 					pindex = OFF_TO_IDX(current->offset +
935567e51e1SAlan Cox 					    (addr - current->start));
936567e51e1SAlan Cox 					m = vm_page_lookup(object, pindex);
9370012f373SJeff Roberson 					if (m != NULL && vm_page_none_valid(m))
938567e51e1SAlan Cox 						m = NULL;
939567e51e1SAlan Cox 					if (m != NULL)
940567e51e1SAlan Cox 						mincoreinfo = MINCORE_INCORE;
941567e51e1SAlan Cox 				}
942567e51e1SAlan Cox 			}
943567e51e1SAlan Cox 			if (m != NULL) {
94401cef4caSMark Johnston 				VM_OBJECT_ASSERT_WLOCKED(m->object);
94501cef4caSMark Johnston 
94601cef4caSMark Johnston 				/* Examine other mappings of the page. */
947567e51e1SAlan Cox 				if (m->dirty == 0 && pmap_is_modified(m))
948567e51e1SAlan Cox 					vm_page_dirty(m);
949567e51e1SAlan Cox 				if (m->dirty != 0)
950867a482dSJohn Dyson 					mincoreinfo |= MINCORE_MODIFIED_OTHER;
95101cef4caSMark Johnston 
952c46b90e9SAlan Cox 				/*
9533407fefeSKonstantin Belousov 				 * The first test for PGA_REFERENCED is an
954c46b90e9SAlan Cox 				 * optimization.  The second test is
955c46b90e9SAlan Cox 				 * required because a concurrent pmap
956c46b90e9SAlan Cox 				 * operation could clear the last reference
9573407fefeSKonstantin Belousov 				 * and set PGA_REFERENCED before the call to
958c46b90e9SAlan Cox 				 * pmap_is_referenced().
959c46b90e9SAlan Cox 				 */
9605cff1f4dSMark Johnston 				if ((m->a.flags & PGA_REFERENCED) != 0 ||
961c46b90e9SAlan Cox 				    pmap_is_referenced(m) ||
9625cff1f4dSMark Johnston 				    (m->a.flags & PGA_REFERENCED) != 0)
963867a482dSJohn Dyson 					mincoreinfo |= MINCORE_REFERENCED_OTHER;
9649b5a5d81SJohn Dyson 			}
965567e51e1SAlan Cox 			if (object != NULL)
96689f6b863SAttilio Rao 				VM_OBJECT_WUNLOCK(object);
967867a482dSJohn Dyson 
968867a482dSJohn Dyson 			/*
969dd2622a8SAlan Cox 			 * subyte may page fault.  In case it needs to modify
970dd2622a8SAlan Cox 			 * the map, we release the lock.
971dd2622a8SAlan Cox 			 */
972dd2622a8SAlan Cox 			vm_map_unlock_read(map);
973dd2622a8SAlan Cox 
974dd2622a8SAlan Cox 			/*
975867a482dSJohn Dyson 			 * calculate index into user supplied byte vector
976867a482dSJohn Dyson 			 */
977d1780e8dSKonstantin Belousov 			vecindex = atop(addr - first_addr);
978867a482dSJohn Dyson 
979867a482dSJohn Dyson 			/*
980867a482dSJohn Dyson 			 * If we have skipped map entries, we need to make sure that
981867a482dSJohn Dyson 			 * the byte vector is zeroed for those skipped entries.
982867a482dSJohn Dyson 			 */
983867a482dSJohn Dyson 			while ((lastvecindex + 1) < vecindex) {
9846a87d217SJohn Baldwin 				++lastvecindex;
985867a482dSJohn Dyson 				error = subyte(vec + lastvecindex, 0);
986867a482dSJohn Dyson 				if (error) {
987d2c60af8SMatthew Dillon 					error = EFAULT;
988d2c60af8SMatthew Dillon 					goto done2;
989867a482dSJohn Dyson 				}
990867a482dSJohn Dyson 			}
991867a482dSJohn Dyson 
992867a482dSJohn Dyson 			/*
993867a482dSJohn Dyson 			 * Pass the page information to the user
994867a482dSJohn Dyson 			 */
995867a482dSJohn Dyson 			error = subyte(vec + vecindex, mincoreinfo);
996867a482dSJohn Dyson 			if (error) {
997d2c60af8SMatthew Dillon 				error = EFAULT;
998d2c60af8SMatthew Dillon 				goto done2;
999867a482dSJohn Dyson 			}
1000dd2622a8SAlan Cox 
1001dd2622a8SAlan Cox 			/*
1002dd2622a8SAlan Cox 			 * If the map has changed, due to the subyte, the previous
1003dd2622a8SAlan Cox 			 * output may be invalid.
1004dd2622a8SAlan Cox 			 */
1005dd2622a8SAlan Cox 			vm_map_lock_read(map);
1006dd2622a8SAlan Cox 			if (timestamp != map->timestamp)
1007dd2622a8SAlan Cox 				goto RestartScan;
1008dd2622a8SAlan Cox 
1009867a482dSJohn Dyson 			lastvecindex = vecindex;
101002c04a2fSJohn Dyson 		}
1011867a482dSJohn Dyson 	}
1012867a482dSJohn Dyson 
1013867a482dSJohn Dyson 	/*
1014dd2622a8SAlan Cox 	 * subyte may page fault.  In case it needs to modify
1015dd2622a8SAlan Cox 	 * the map, we release the lock.
1016dd2622a8SAlan Cox 	 */
1017dd2622a8SAlan Cox 	vm_map_unlock_read(map);
1018dd2622a8SAlan Cox 
1019dd2622a8SAlan Cox 	/*
1020867a482dSJohn Dyson 	 * Zero the last entries in the byte vector.
1021867a482dSJohn Dyson 	 */
1022d1780e8dSKonstantin Belousov 	vecindex = atop(end - first_addr);
1023867a482dSJohn Dyson 	while ((lastvecindex + 1) < vecindex) {
10246a87d217SJohn Baldwin 		++lastvecindex;
1025867a482dSJohn Dyson 		error = subyte(vec + lastvecindex, 0);
1026867a482dSJohn Dyson 		if (error) {
1027d2c60af8SMatthew Dillon 			error = EFAULT;
1028d2c60af8SMatthew Dillon 			goto done2;
1029867a482dSJohn Dyson 		}
1030867a482dSJohn Dyson 	}
1031867a482dSJohn Dyson 
1032dd2622a8SAlan Cox 	/*
1033dd2622a8SAlan Cox 	 * If the map has changed, due to the subyte, the previous
1034dd2622a8SAlan Cox 	 * output may be invalid.
1035dd2622a8SAlan Cox 	 */
1036dd2622a8SAlan Cox 	vm_map_lock_read(map);
1037dd2622a8SAlan Cox 	if (timestamp != map->timestamp)
1038dd2622a8SAlan Cox 		goto RestartScan;
1039eff50fcdSAlan Cox 	vm_map_unlock_read(map);
1040d2c60af8SMatthew Dillon done2:
1041d2c60af8SMatthew Dillon 	return (error);
1042df8bae1dSRodney W. Grimes }
1043df8bae1dSRodney W. Grimes 
1044d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
1045df8bae1dSRodney W. Grimes struct mlock_args {
1046651bb817SAlexander Langer 	const void *addr;
1047df8bae1dSRodney W. Grimes 	size_t len;
1048df8bae1dSRodney W. Grimes };
1049d2d3e875SBruce Evans #endif
1050df8bae1dSRodney W. Grimes int
105104e89ffbSKonstantin Belousov sys_mlock(struct thread *td, struct mlock_args *uap)
1052df8bae1dSRodney W. Grimes {
1053995d7069SGleb Smirnoff 
1054496ab053SKonstantin Belousov 	return (kern_mlock(td->td_proc, td->td_ucred,
1055496ab053SKonstantin Belousov 	    __DECONST(uintptr_t, uap->addr), uap->len));
1056995d7069SGleb Smirnoff }
1057995d7069SGleb Smirnoff 
1058995d7069SGleb Smirnoff int
1059496ab053SKonstantin Belousov kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr0, size_t len)
1060995d7069SGleb Smirnoff {
1061bb734798SDon Lewis 	vm_offset_t addr, end, last, start;
1062bb734798SDon Lewis 	vm_size_t npages, size;
10633ac7d297SAndrey Zonov 	vm_map_t map;
10641ba5ad42SEdward Tomasz Napierala 	unsigned long nsize;
1065bb734798SDon Lewis 	int error;
1066df8bae1dSRodney W. Grimes 
1067cc426dd3SMateusz Guzik 	error = priv_check_cred(cred, PRIV_VM_MLOCK);
106847934cefSDon Lewis 	if (error)
106947934cefSDon Lewis 		return (error);
1070496ab053SKonstantin Belousov 	addr = addr0;
1071995d7069SGleb Smirnoff 	size = len;
1072bb734798SDon Lewis 	last = addr + size;
107316929939SDon Lewis 	start = trunc_page(addr);
1074bb734798SDon Lewis 	end = round_page(last);
1075bb734798SDon Lewis 	if (last < addr || end < addr)
1076df8bae1dSRodney W. Grimes 		return (EINVAL);
107716929939SDon Lewis 	npages = atop(end - start);
107854a3a114SMark Johnston 	if (npages > vm_page_max_user_wired)
107916929939SDon Lewis 		return (ENOMEM);
10803ac7d297SAndrey Zonov 	map = &proc->p_vmspace->vm_map;
108147934cefSDon Lewis 	PROC_LOCK(proc);
10823ac7d297SAndrey Zonov 	nsize = ptoa(npages + pmap_wired_count(map->pmap));
1083f6f6d240SMateusz Guzik 	if (nsize > lim_cur_proc(proc, RLIMIT_MEMLOCK)) {
108447934cefSDon Lewis 		PROC_UNLOCK(proc);
10854a40e3d4SJohn Dyson 		return (ENOMEM);
108691d5354aSJohn Baldwin 	}
108747934cefSDon Lewis 	PROC_UNLOCK(proc);
1088afcc55f3SEdward Tomasz Napierala #ifdef RACCT
10894b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
10901ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
10911ba5ad42SEdward Tomasz Napierala 		error = racct_set(proc, RACCT_MEMLOCK, nsize);
10921ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
10931ba5ad42SEdward Tomasz Napierala 		if (error != 0)
10941ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
10954b5c9cf6SEdward Tomasz Napierala 	}
1096afcc55f3SEdward Tomasz Napierala #endif
10973ac7d297SAndrey Zonov 	error = vm_map_wire(map, start, end,
109816929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1099afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11004b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
11011ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(proc);
11021ba5ad42SEdward Tomasz Napierala 		racct_set(proc, RACCT_MEMLOCK,
11033ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
11041ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(proc);
11051ba5ad42SEdward Tomasz Napierala 	}
1106afcc55f3SEdward Tomasz Napierala #endif
1107d301b358SKonstantin Belousov 	switch (error) {
1108d301b358SKonstantin Belousov 	case KERN_SUCCESS:
1109d301b358SKonstantin Belousov 		return (0);
1110d301b358SKonstantin Belousov 	case KERN_INVALID_ARGUMENT:
1111d301b358SKonstantin Belousov 		return (EINVAL);
1112d301b358SKonstantin Belousov 	default:
1113d301b358SKonstantin Belousov 		return (ENOMEM);
1114d301b358SKonstantin Belousov 	}
1115df8bae1dSRodney W. Grimes }
1116df8bae1dSRodney W. Grimes 
1117d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
11184a40e3d4SJohn Dyson struct mlockall_args {
11194a40e3d4SJohn Dyson 	int	how;
11204a40e3d4SJohn Dyson };
11214a40e3d4SJohn Dyson #endif
11224a40e3d4SJohn Dyson 
11234a40e3d4SJohn Dyson int
112404e89ffbSKonstantin Belousov sys_mlockall(struct thread *td, struct mlockall_args *uap)
11254a40e3d4SJohn Dyson {
1126abd498aaSBruce M Simpson 	vm_map_t map;
1127abd498aaSBruce M Simpson 	int error;
1128abd498aaSBruce M Simpson 
1129abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
11307e19eda4SAndrey Zonov 	error = priv_check(td, PRIV_VM_MLOCK);
11317e19eda4SAndrey Zonov 	if (error)
11327e19eda4SAndrey Zonov 		return (error);
1133abd498aaSBruce M Simpson 
1134abd498aaSBruce M Simpson 	if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0))
1135abd498aaSBruce M Simpson 		return (EINVAL);
1136abd498aaSBruce M Simpson 
1137abd498aaSBruce M Simpson 	/*
1138abd498aaSBruce M Simpson 	 * If wiring all pages in the process would cause it to exceed
1139abd498aaSBruce M Simpson 	 * a hard resource limit, return ENOMEM.
1140abd498aaSBruce M Simpson 	 */
11417e19eda4SAndrey Zonov 	if (!old_mlock && uap->how & MCL_CURRENT) {
11422554f86aSMateusz Guzik 		if (map->size > lim_cur(td, RLIMIT_MEMLOCK))
1143abd498aaSBruce M Simpson 			return (ENOMEM);
114491d5354aSJohn Baldwin 	}
1145afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11464b5c9cf6SEdward Tomasz Napierala 	if (racct_enable) {
11471ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11481ba5ad42SEdward Tomasz Napierala 		error = racct_set(td->td_proc, RACCT_MEMLOCK, map->size);
11491ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11501ba5ad42SEdward Tomasz Napierala 		if (error != 0)
11511ba5ad42SEdward Tomasz Napierala 			return (ENOMEM);
11524b5c9cf6SEdward Tomasz Napierala 	}
1153afcc55f3SEdward Tomasz Napierala #endif
1154abd498aaSBruce M Simpson 
1155abd498aaSBruce M Simpson 	if (uap->how & MCL_FUTURE) {
1156abd498aaSBruce M Simpson 		vm_map_lock(map);
1157abd498aaSBruce M Simpson 		vm_map_modflags(map, MAP_WIREFUTURE, 0);
1158abd498aaSBruce M Simpson 		vm_map_unlock(map);
1159abd498aaSBruce M Simpson 		error = 0;
1160abd498aaSBruce M Simpson 	}
1161abd498aaSBruce M Simpson 
1162abd498aaSBruce M Simpson 	if (uap->how & MCL_CURRENT) {
1163abd498aaSBruce M Simpson 		/*
1164abd498aaSBruce M Simpson 		 * P1003.1-2001 mandates that all currently mapped pages
1165abd498aaSBruce M Simpson 		 * will be memory resident and locked (wired) upon return
1166abd498aaSBruce M Simpson 		 * from mlockall(). vm_map_wire() will wire pages, by
1167abd498aaSBruce M Simpson 		 * calling vm_fault_wire() for each page in the region.
1168abd498aaSBruce M Simpson 		 */
1169abd498aaSBruce M Simpson 		error = vm_map_wire(map, vm_map_min(map), vm_map_max(map),
1170abd498aaSBruce M Simpson 		    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
117154a3a114SMark Johnston 		if (error == KERN_SUCCESS)
117254a3a114SMark Johnston 			error = 0;
117354a3a114SMark Johnston 		else if (error == KERN_RESOURCE_SHORTAGE)
117454a3a114SMark Johnston 			error = ENOMEM;
117554a3a114SMark Johnston 		else
117654a3a114SMark Johnston 			error = EAGAIN;
1177abd498aaSBruce M Simpson 	}
1178afcc55f3SEdward Tomasz Napierala #ifdef RACCT
11794b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error != KERN_SUCCESS) {
11801ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
11811ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK,
11823ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)));
11831ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
11841ba5ad42SEdward Tomasz Napierala 	}
1185afcc55f3SEdward Tomasz Napierala #endif
1186abd498aaSBruce M Simpson 
1187abd498aaSBruce M Simpson 	return (error);
11884a40e3d4SJohn Dyson }
11894a40e3d4SJohn Dyson 
11904a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1191fa721254SAlfred Perlstein struct munlockall_args {
1192abd498aaSBruce M Simpson 	register_t dummy;
11934a40e3d4SJohn Dyson };
11944a40e3d4SJohn Dyson #endif
11954a40e3d4SJohn Dyson 
11964a40e3d4SJohn Dyson int
119704e89ffbSKonstantin Belousov sys_munlockall(struct thread *td, struct munlockall_args *uap)
11984a40e3d4SJohn Dyson {
1199abd498aaSBruce M Simpson 	vm_map_t map;
1200abd498aaSBruce M Simpson 	int error;
1201abd498aaSBruce M Simpson 
1202abd498aaSBruce M Simpson 	map = &td->td_proc->p_vmspace->vm_map;
1203acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
1204abd498aaSBruce M Simpson 	if (error)
1205abd498aaSBruce M Simpson 		return (error);
1206abd498aaSBruce M Simpson 
1207abd498aaSBruce M Simpson 	/* Clear the MAP_WIREFUTURE flag from this vm_map. */
1208abd498aaSBruce M Simpson 	vm_map_lock(map);
1209abd498aaSBruce M Simpson 	vm_map_modflags(map, 0, MAP_WIREFUTURE);
1210abd498aaSBruce M Simpson 	vm_map_unlock(map);
1211abd498aaSBruce M Simpson 
1212abd498aaSBruce M Simpson 	/* Forcibly unwire all pages. */
1213abd498aaSBruce M Simpson 	error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map),
1214abd498aaSBruce M Simpson 	    VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK);
1215afcc55f3SEdward Tomasz Napierala #ifdef RACCT
12164b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
12171ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
12181ba5ad42SEdward Tomasz Napierala 		racct_set(td->td_proc, RACCT_MEMLOCK, 0);
12191ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
12201ba5ad42SEdward Tomasz Napierala 	}
1221afcc55f3SEdward Tomasz Napierala #endif
1222abd498aaSBruce M Simpson 
1223abd498aaSBruce M Simpson 	return (error);
12244a40e3d4SJohn Dyson }
12254a40e3d4SJohn Dyson 
12264a40e3d4SJohn Dyson #ifndef _SYS_SYSPROTO_H_
1227df8bae1dSRodney W. Grimes struct munlock_args {
1228651bb817SAlexander Langer 	const void *addr;
1229df8bae1dSRodney W. Grimes 	size_t len;
1230df8bae1dSRodney W. Grimes };
1231d2d3e875SBruce Evans #endif
1232df8bae1dSRodney W. Grimes int
123369cdfcefSEdward Tomasz Napierala sys_munlock(struct thread *td, struct munlock_args *uap)
1234df8bae1dSRodney W. Grimes {
123569cdfcefSEdward Tomasz Napierala 
1236496ab053SKonstantin Belousov 	return (kern_munlock(td, (uintptr_t)uap->addr, uap->len));
123769cdfcefSEdward Tomasz Napierala }
123869cdfcefSEdward Tomasz Napierala 
123969cdfcefSEdward Tomasz Napierala int
1240496ab053SKonstantin Belousov kern_munlock(struct thread *td, uintptr_t addr0, size_t size)
124169cdfcefSEdward Tomasz Napierala {
1242496ab053SKonstantin Belousov 	vm_offset_t addr, end, last, start;
1243fc2b1679SJeremie Le Hen #ifdef RACCT
1244c92b5069SJeremie Le Hen 	vm_map_t map;
1245fc2b1679SJeremie Le Hen #endif
1246df8bae1dSRodney W. Grimes 	int error;
1247df8bae1dSRodney W. Grimes 
1248acd3428bSRobert Watson 	error = priv_check(td, PRIV_VM_MUNLOCK);
124947934cefSDon Lewis 	if (error)
125047934cefSDon Lewis 		return (error);
1251496ab053SKonstantin Belousov 	addr = addr0;
1252bb734798SDon Lewis 	last = addr + size;
125316929939SDon Lewis 	start = trunc_page(addr);
1254bb734798SDon Lewis 	end = round_page(last);
1255bb734798SDon Lewis 	if (last < addr || end < addr)
1256df8bae1dSRodney W. Grimes 		return (EINVAL);
125716929939SDon Lewis 	error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, start, end,
125816929939SDon Lewis 	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
1259afcc55f3SEdward Tomasz Napierala #ifdef RACCT
12604b5c9cf6SEdward Tomasz Napierala 	if (racct_enable && error == KERN_SUCCESS) {
12611ba5ad42SEdward Tomasz Napierala 		PROC_LOCK(td->td_proc);
1262c92b5069SJeremie Le Hen 		map = &td->td_proc->p_vmspace->vm_map;
1263c92b5069SJeremie Le Hen 		racct_set(td->td_proc, RACCT_MEMLOCK,
1264c92b5069SJeremie Le Hen 		    ptoa(pmap_wired_count(map->pmap)));
12651ba5ad42SEdward Tomasz Napierala 		PROC_UNLOCK(td->td_proc);
12661ba5ad42SEdward Tomasz Napierala 	}
1267afcc55f3SEdward Tomasz Napierala #endif
1268df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
1269df8bae1dSRodney W. Grimes }
1270df8bae1dSRodney W. Grimes 
1271df8bae1dSRodney W. Grimes /*
1272c8daea13SAlexander Kabaev  * vm_mmap_vnode()
1273c8daea13SAlexander Kabaev  *
1274c8daea13SAlexander Kabaev  * Helper function for vm_mmap.  Perform sanity check specific for mmap
1275c8daea13SAlexander Kabaev  * operations on vnodes.
1276c8daea13SAlexander Kabaev  */
1277c8daea13SAlexander Kabaev int
1278c8daea13SAlexander Kabaev vm_mmap_vnode(struct thread *td, vm_size_t objsize,
1279c8daea13SAlexander Kabaev     vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
128084110e7eSKonstantin Belousov     struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
128184110e7eSKonstantin Belousov     boolean_t *writecounted)
1282c8daea13SAlexander Kabaev {
1283c8daea13SAlexander Kabaev 	struct vattr va;
1284c8daea13SAlexander Kabaev 	vm_object_t obj;
1285bd0e1bebSMark Johnston 	vm_ooffset_t foff;
12860359a12eSAttilio Rao 	struct ucred *cred;
128778022527SKonstantin Belousov 	int error, flags;
128878022527SKonstantin Belousov 	bool writex;
1289c8daea13SAlexander Kabaev 
12900359a12eSAttilio Rao 	cred = td->td_ucred;
129178022527SKonstantin Belousov 	writex = (*maxprotp & VM_PROT_WRITE) != 0 &&
129278022527SKonstantin Belousov 	    (*flagsp & MAP_SHARED) != 0;
1293a92a971bSMateusz Guzik 	if ((error = vget(vp, LK_SHARED)) != 0)
1294c8daea13SAlexander Kabaev 		return (error);
12950df42647SRobert Watson 	AUDIT_ARG_VNODE1(vp);
129664345f0bSJohn Baldwin 	foff = *foffp;
1297c8daea13SAlexander Kabaev 	flags = *flagsp;
12988516dd18SPoul-Henning Kamp 	obj = vp->v_object;
1299c8daea13SAlexander Kabaev 	if (vp->v_type == VREG) {
1300c8daea13SAlexander Kabaev 		/*
1301c8daea13SAlexander Kabaev 		 * Get the proper underlying object
1302c8daea13SAlexander Kabaev 		 */
13038516dd18SPoul-Henning Kamp 		if (obj == NULL) {
1304c8daea13SAlexander Kabaev 			error = EINVAL;
1305c8daea13SAlexander Kabaev 			goto done;
1306c8daea13SAlexander Kabaev 		}
1307e5f299ffSKonstantin Belousov 		if (obj->type == OBJT_VNODE && obj->handle != vp) {
1308c8daea13SAlexander Kabaev 			vput(vp);
1309c8daea13SAlexander Kabaev 			vp = (struct vnode *)obj->handle;
131084110e7eSKonstantin Belousov 			/*
131184110e7eSKonstantin Belousov 			 * Bypass filesystems obey the mpsafety of the
131253f5f8a0SKonstantin Belousov 			 * underlying fs.  Tmpfs never bypasses.
131384110e7eSKonstantin Belousov 			 */
1314a92a971bSMateusz Guzik 			error = vget(vp, LK_SHARED);
13155050aa86SKonstantin Belousov 			if (error != 0)
131684110e7eSKonstantin Belousov 				return (error);
131784110e7eSKonstantin Belousov 		}
131878022527SKonstantin Belousov 		if (writex) {
131984110e7eSKonstantin Belousov 			*writecounted = TRUE;
1320fe7bcbafSKyle Evans 			vm_pager_update_writecount(obj, 0, objsize);
132184110e7eSKonstantin Belousov 		}
1322c8daea13SAlexander Kabaev 	} else {
1323c8daea13SAlexander Kabaev 		error = EINVAL;
1324c8daea13SAlexander Kabaev 		goto done;
1325c8daea13SAlexander Kabaev 	}
13260359a12eSAttilio Rao 	if ((error = VOP_GETATTR(vp, &va, cred)))
1327c8daea13SAlexander Kabaev 		goto done;
1328c92163dcSChristian S.J. Peron #ifdef MAC
13297077c426SJohn Baldwin 	/* This relies on VM_PROT_* matching PROT_*. */
13307077c426SJohn Baldwin 	error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
1331c92163dcSChristian S.J. Peron 	if (error != 0)
1332c92163dcSChristian S.J. Peron 		goto done;
1333c92163dcSChristian S.J. Peron #endif
1334c8daea13SAlexander Kabaev 	if ((flags & MAP_SHARED) != 0) {
1335c8daea13SAlexander Kabaev 		if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
13367077c426SJohn Baldwin 			if (prot & VM_PROT_WRITE) {
1337c8daea13SAlexander Kabaev 				error = EPERM;
1338c8daea13SAlexander Kabaev 				goto done;
1339c8daea13SAlexander Kabaev 			}
1340c8daea13SAlexander Kabaev 			*maxprotp &= ~VM_PROT_WRITE;
1341c8daea13SAlexander Kabaev 		}
1342c8daea13SAlexander Kabaev 	}
1343c8daea13SAlexander Kabaev 	/*
1344c8daea13SAlexander Kabaev 	 * If it is a regular file without any references
1345c8daea13SAlexander Kabaev 	 * we do not need to sync it.
1346c8daea13SAlexander Kabaev 	 * Adjust object size to be the size of actual file.
1347c8daea13SAlexander Kabaev 	 */
1348c8daea13SAlexander Kabaev 	objsize = round_page(va.va_size);
1349c8daea13SAlexander Kabaev 	if (va.va_nlink == 0)
1350c8daea13SAlexander Kabaev 		flags |= MAP_NOSYNC;
13513d653db0SAlan Cox 	if (obj->type == OBJT_VNODE) {
1352e5f299ffSKonstantin Belousov 		obj = vm_pager_allocate(OBJT_VNODE, vp, objsize, prot, foff,
1353e5f299ffSKonstantin Belousov 		    cred);
1354c8daea13SAlexander Kabaev 		if (obj == NULL) {
135564345f0bSJohn Baldwin 			error = ENOMEM;
1356c8daea13SAlexander Kabaev 			goto done;
1357c8daea13SAlexander Kabaev 		}
13583d653db0SAlan Cox 	} else {
13593d653db0SAlan Cox 		KASSERT(obj->type == OBJT_DEFAULT || obj->type == OBJT_SWAP,
13603d653db0SAlan Cox 		    ("wrong object type"));
1361f2410510SJeff Roberson 		vm_object_reference(obj);
13623d653db0SAlan Cox #if VM_NRESERVLEVEL > 0
1363f2410510SJeff Roberson 		if ((obj->flags & OBJ_COLORED) == 0) {
1364f2410510SJeff Roberson 			VM_OBJECT_WLOCK(obj);
13653d653db0SAlan Cox 			vm_object_color(obj, 0);
13663d653db0SAlan Cox 			VM_OBJECT_WUNLOCK(obj);
13673d653db0SAlan Cox 		}
1368f2410510SJeff Roberson #endif
1369f2410510SJeff Roberson 	}
1370c8daea13SAlexander Kabaev 	*objp = obj;
1371c8daea13SAlexander Kabaev 	*flagsp = flags;
137264345f0bSJohn Baldwin 
1373643656cfSMateusz Guzik 	VOP_MMAPPED(vp);
13741e309003SDiomidis Spinellis 
1375c8daea13SAlexander Kabaev done:
1376bafa6cfcSKonstantin Belousov 	if (error != 0 && *writecounted) {
1377bafa6cfcSKonstantin Belousov 		*writecounted = FALSE;
1378fe7bcbafSKyle Evans 		vm_pager_update_writecount(obj, objsize, 0);
1379bafa6cfcSKonstantin Belousov 	}
1380c8daea13SAlexander Kabaev 	vput(vp);
1381c8daea13SAlexander Kabaev 	return (error);
1382c8daea13SAlexander Kabaev }
1383c8daea13SAlexander Kabaev 
1384c8daea13SAlexander Kabaev /*
138598df9218SJohn Baldwin  * vm_mmap_cdev()
138698df9218SJohn Baldwin  *
138798df9218SJohn Baldwin  * Helper function for vm_mmap.  Perform sanity check specific for mmap
138898df9218SJohn Baldwin  * operations on cdevs.
138998df9218SJohn Baldwin  */
139098df9218SJohn Baldwin int
13917077c426SJohn Baldwin vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
13927077c426SJohn Baldwin     vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
13937077c426SJohn Baldwin     vm_ooffset_t *foff, vm_object_t *objp)
139498df9218SJohn Baldwin {
139598df9218SJohn Baldwin 	vm_object_t obj;
13967077c426SJohn Baldwin 	int error, flags;
139798df9218SJohn Baldwin 
139898df9218SJohn Baldwin 	flags = *flagsp;
139998df9218SJohn Baldwin 
140091a35e78SKonstantin Belousov 	if (dsw->d_flags & D_MMAP_ANON) {
14017077c426SJohn Baldwin 		*objp = NULL;
14027077c426SJohn Baldwin 		*foff = 0;
140398df9218SJohn Baldwin 		*maxprotp = VM_PROT_ALL;
140498df9218SJohn Baldwin 		*flagsp |= MAP_ANON;
140598df9218SJohn Baldwin 		return (0);
140698df9218SJohn Baldwin 	}
140798df9218SJohn Baldwin 	/*
140864345f0bSJohn Baldwin 	 * cdevs do not provide private mappings of any kind.
140998df9218SJohn Baldwin 	 */
141098df9218SJohn Baldwin 	if ((*maxprotp & VM_PROT_WRITE) == 0 &&
14117077c426SJohn Baldwin 	    (prot & VM_PROT_WRITE) != 0)
141298df9218SJohn Baldwin 		return (EACCES);
14137077c426SJohn Baldwin 	if (flags & (MAP_PRIVATE|MAP_COPY))
141498df9218SJohn Baldwin 		return (EINVAL);
141598df9218SJohn Baldwin 	/*
141698df9218SJohn Baldwin 	 * Force device mappings to be shared.
141798df9218SJohn Baldwin 	 */
141898df9218SJohn Baldwin 	flags |= MAP_SHARED;
141998df9218SJohn Baldwin #ifdef MAC_XXX
14207077c426SJohn Baldwin 	error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
14217077c426SJohn Baldwin 	if (error != 0)
142298df9218SJohn Baldwin 		return (error);
142398df9218SJohn Baldwin #endif
142464345f0bSJohn Baldwin 	/*
142564345f0bSJohn Baldwin 	 * First, try d_mmap_single().  If that is not implemented
142664345f0bSJohn Baldwin 	 * (returns ENODEV), fall back to using the device pager.
142764345f0bSJohn Baldwin 	 * Note that d_mmap_single() must return a reference to the
142864345f0bSJohn Baldwin 	 * object (it needs to bump the reference count of the object
142964345f0bSJohn Baldwin 	 * it returns somehow).
143064345f0bSJohn Baldwin 	 *
143164345f0bSJohn Baldwin 	 * XXX assumes VM_PROT_* == PROT_*
143264345f0bSJohn Baldwin 	 */
143364345f0bSJohn Baldwin 	error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
143464345f0bSJohn Baldwin 	if (error != ENODEV)
143564345f0bSJohn Baldwin 		return (error);
14363364c323SKonstantin Belousov 	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
14373364c323SKonstantin Belousov 	    td->td_ucred);
143898df9218SJohn Baldwin 	if (obj == NULL)
143998df9218SJohn Baldwin 		return (EINVAL);
144098df9218SJohn Baldwin 	*objp = obj;
144198df9218SJohn Baldwin 	*flagsp = flags;
144298df9218SJohn Baldwin 	return (0);
144398df9218SJohn Baldwin }
144498df9218SJohn Baldwin 
144598df9218SJohn Baldwin /*
1446d2c60af8SMatthew Dillon  * vm_mmap()
1447d2c60af8SMatthew Dillon  *
14487077c426SJohn Baldwin  * Internal version of mmap used by exec, sys5 shared memory, and
14497077c426SJohn Baldwin  * various device drivers.  Handle is either a vnode pointer, a
14507077c426SJohn Baldwin  * character device, or NULL for MAP_ANON.
1451df8bae1dSRodney W. Grimes  */
1452df8bae1dSRodney W. Grimes int
1453b9dcd593SBruce Evans vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
1454b9dcd593SBruce Evans 	vm_prot_t maxprot, int flags,
145598df9218SJohn Baldwin 	objtype_t handle_type, void *handle,
1456b9dcd593SBruce Evans 	vm_ooffset_t foff)
1457df8bae1dSRodney W. Grimes {
14587077c426SJohn Baldwin 	vm_object_t object;
1459b40ce416SJulian Elischer 	struct thread *td = curthread;
14607077c426SJohn Baldwin 	int error;
146184110e7eSKonstantin Belousov 	boolean_t writecounted;
1462df8bae1dSRodney W. Grimes 
1463df8bae1dSRodney W. Grimes 	if (size == 0)
14647077c426SJohn Baldwin 		return (EINVAL);
1465df8bae1dSRodney W. Grimes 
1466749474f2SPeter Wemm 	size = round_page(size);
1467010ba384SMark Johnston 	object = NULL;
14687077c426SJohn Baldwin 	writecounted = FALSE;
14697077c426SJohn Baldwin 
14707077c426SJohn Baldwin 	/*
14717077c426SJohn Baldwin 	 * Lookup/allocate object.
14727077c426SJohn Baldwin 	 */
14737077c426SJohn Baldwin 	switch (handle_type) {
14747077c426SJohn Baldwin 	case OBJT_DEVICE: {
14757077c426SJohn Baldwin 		struct cdevsw *dsw;
14767077c426SJohn Baldwin 		struct cdev *cdev;
14777077c426SJohn Baldwin 		int ref;
14787077c426SJohn Baldwin 
14797077c426SJohn Baldwin 		cdev = handle;
14807077c426SJohn Baldwin 		dsw = dev_refthread(cdev, &ref);
14817077c426SJohn Baldwin 		if (dsw == NULL)
14827077c426SJohn Baldwin 			return (ENXIO);
14837077c426SJohn Baldwin 		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
14847077c426SJohn Baldwin 		    dsw, &foff, &object);
14857077c426SJohn Baldwin 		dev_relthread(cdev, ref);
14867077c426SJohn Baldwin 		break;
14877077c426SJohn Baldwin 	}
14887077c426SJohn Baldwin 	case OBJT_VNODE:
14897077c426SJohn Baldwin 		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
14907077c426SJohn Baldwin 		    handle, &foff, &object, &writecounted);
14917077c426SJohn Baldwin 		break;
14927077c426SJohn Baldwin 	case OBJT_DEFAULT:
14937077c426SJohn Baldwin 		if (handle == NULL) {
14947077c426SJohn Baldwin 			error = 0;
14957077c426SJohn Baldwin 			break;
14967077c426SJohn Baldwin 		}
14977077c426SJohn Baldwin 		/* FALLTHROUGH */
14987077c426SJohn Baldwin 	default:
14997077c426SJohn Baldwin 		error = EINVAL;
15007077c426SJohn Baldwin 		break;
15017077c426SJohn Baldwin 	}
15027077c426SJohn Baldwin 	if (error)
15037077c426SJohn Baldwin 		return (error);
15047077c426SJohn Baldwin 
15057077c426SJohn Baldwin 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
15067077c426SJohn Baldwin 	    foff, writecounted, td);
15077077c426SJohn Baldwin 	if (error != 0 && object != NULL) {
15087077c426SJohn Baldwin 		/*
15097077c426SJohn Baldwin 		 * If this mapping was accounted for in the vnode's
15107077c426SJohn Baldwin 		 * writecount, then undo that now.
15117077c426SJohn Baldwin 		 */
15127077c426SJohn Baldwin 		if (writecounted)
1513fe7bcbafSKyle Evans 			vm_pager_release_writecount(object, 0, size);
15147077c426SJohn Baldwin 		vm_object_deallocate(object);
15157077c426SJohn Baldwin 	}
15167077c426SJohn Baldwin 	return (error);
15177077c426SJohn Baldwin }
15187077c426SJohn Baldwin 
15197077c426SJohn Baldwin int
152067a659d2SKonstantin Belousov kern_mmap_racct_check(struct thread *td, vm_map_t map, vm_size_t size)
15217077c426SJohn Baldwin {
152267a659d2SKonstantin Belousov 	int error;
1523df8bae1dSRodney W. Grimes 
15242554f86aSMateusz Guzik 	RACCT_PROC_LOCK(td->td_proc);
15252554f86aSMateusz Guzik 	if (map->size + size > lim_cur(td, RLIMIT_VMEM)) {
15262554f86aSMateusz Guzik 		RACCT_PROC_UNLOCK(td->td_proc);
1527070f64feSMatthew Dillon 		return (ENOMEM);
1528070f64feSMatthew Dillon 	}
1529a6492969SAlan Cox 	if (racct_set(td->td_proc, RACCT_VMEM, map->size + size)) {
15302554f86aSMateusz Guzik 		RACCT_PROC_UNLOCK(td->td_proc);
15311ba5ad42SEdward Tomasz Napierala 		return (ENOMEM);
15321ba5ad42SEdward Tomasz Napierala 	}
15337e19eda4SAndrey Zonov 	if (!old_mlock && map->flags & MAP_WIREFUTURE) {
15343ac7d297SAndrey Zonov 		if (ptoa(pmap_wired_count(map->pmap)) + size >
15352554f86aSMateusz Guzik 		    lim_cur(td, RLIMIT_MEMLOCK)) {
153667a659d2SKonstantin Belousov 			racct_set_force(td->td_proc, RACCT_VMEM, map->size);
15372554f86aSMateusz Guzik 			RACCT_PROC_UNLOCK(td->td_proc);
15387e19eda4SAndrey Zonov 			return (ENOMEM);
15397e19eda4SAndrey Zonov 		}
15407e19eda4SAndrey Zonov 		error = racct_set(td->td_proc, RACCT_MEMLOCK,
15413ac7d297SAndrey Zonov 		    ptoa(pmap_wired_count(map->pmap)) + size);
15427e19eda4SAndrey Zonov 		if (error != 0) {
154367a659d2SKonstantin Belousov 			racct_set_force(td->td_proc, RACCT_VMEM, map->size);
15442554f86aSMateusz Guzik 			RACCT_PROC_UNLOCK(td->td_proc);
15457e19eda4SAndrey Zonov 			return (error);
15467e19eda4SAndrey Zonov 		}
15477e19eda4SAndrey Zonov 	}
15482554f86aSMateusz Guzik 	RACCT_PROC_UNLOCK(td->td_proc);
154967a659d2SKonstantin Belousov 	return (0);
155067a659d2SKonstantin Belousov }
155167a659d2SKonstantin Belousov 
155267a659d2SKonstantin Belousov /*
155367a659d2SKonstantin Belousov  * Internal version of mmap that maps a specific VM object into an
155467a659d2SKonstantin Belousov  * map.  Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
155567a659d2SKonstantin Belousov  */
155667a659d2SKonstantin Belousov int
155767a659d2SKonstantin Belousov vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
155867a659d2SKonstantin Belousov     vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
155967a659d2SKonstantin Belousov     boolean_t writecounted, struct thread *td)
156067a659d2SKonstantin Belousov {
156167a659d2SKonstantin Belousov 	vm_offset_t max_addr;
156267a659d2SKonstantin Belousov 	int docow, error, findspace, rv;
156367a659d2SKonstantin Belousov 	bool curmap, fitit;
156467a659d2SKonstantin Belousov 
156567a659d2SKonstantin Belousov 	curmap = map == &td->td_proc->p_vmspace->vm_map;
156667a659d2SKonstantin Belousov 	if (curmap) {
156767a659d2SKonstantin Belousov 		error = kern_mmap_racct_check(td, map, size);
156867a659d2SKonstantin Belousov 		if (error != 0)
156967a659d2SKonstantin Belousov 			return (error);
1570a6492969SAlan Cox 	}
1571070f64feSMatthew Dillon 
1572df8bae1dSRodney W. Grimes 	/*
1573bc9ad247SDavid Greenman 	 * We currently can only deal with page aligned file offsets.
15747077c426SJohn Baldwin 	 * The mmap() system call already enforces this by subtracting
15757077c426SJohn Baldwin 	 * the page offset from the file offset, but checking here
15767077c426SJohn Baldwin 	 * catches errors in device drivers (e.g. d_single_mmap()
15777077c426SJohn Baldwin 	 * callbacks) and other internal mapping requests (such as in
15787077c426SJohn Baldwin 	 * exec).
1579bc9ad247SDavid Greenman 	 */
1580bc9ad247SDavid Greenman 	if (foff & PAGE_MASK)
1581bc9ad247SDavid Greenman 		return (EINVAL);
1582bc9ad247SDavid Greenman 
158306cb7259SDavid Greenman 	if ((flags & MAP_FIXED) == 0) {
158406cb7259SDavid Greenman 		fitit = TRUE;
158506cb7259SDavid Greenman 		*addr = round_page(*addr);
158606cb7259SDavid Greenman 	} else {
158706cb7259SDavid Greenman 		if (*addr != trunc_page(*addr))
158806cb7259SDavid Greenman 			return (EINVAL);
158906cb7259SDavid Greenman 		fitit = FALSE;
159006cb7259SDavid Greenman 	}
159184110e7eSKonstantin Belousov 
15925f55e841SDavid Greenman 	if (flags & MAP_ANON) {
15937077c426SJohn Baldwin 		if (object != NULL || foff != 0)
15947077c426SJohn Baldwin 			return (EINVAL);
1595c8daea13SAlexander Kabaev 		docow = 0;
159674ffb9afSAlan Cox 	} else if (flags & MAP_PREFAULT_READ)
159774ffb9afSAlan Cox 		docow = MAP_PREFAULT;
159874ffb9afSAlan Cox 	else
15994738fa09SAlan Cox 		docow = MAP_PREFAULT_PARTIAL;
1600df8bae1dSRodney W. Grimes 
16014f79d873SMatthew Dillon 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
16024738fa09SAlan Cox 		docow |= MAP_COPY_ON_WRITE;
16034f79d873SMatthew Dillon 	if (flags & MAP_NOSYNC)
16044f79d873SMatthew Dillon 		docow |= MAP_DISABLE_SYNCER;
16059730a5daSPaul Saab 	if (flags & MAP_NOCORE)
16069730a5daSPaul Saab 		docow |= MAP_DISABLE_COREDUMP;
16078211bd45SKonstantin Belousov 	/* Shared memory is also shared with children. */
16088211bd45SKonstantin Belousov 	if (flags & MAP_SHARED)
16098211bd45SKonstantin Belousov 		docow |= MAP_INHERIT_SHARE;
161084110e7eSKonstantin Belousov 	if (writecounted)
1611fe7bcbafSKyle Evans 		docow |= MAP_WRITECOUNT;
16124648ba0aSKonstantin Belousov 	if (flags & MAP_STACK) {
16134648ba0aSKonstantin Belousov 		if (object != NULL)
16144648ba0aSKonstantin Belousov 			return (EINVAL);
16154648ba0aSKonstantin Belousov 		docow |= MAP_STACK_GROWS_DOWN;
16164648ba0aSKonstantin Belousov 	}
161711c42bccSKonstantin Belousov 	if ((flags & MAP_EXCL) != 0)
161811c42bccSKonstantin Belousov 		docow |= MAP_CHECK_EXCL;
161919bd0d9cSKonstantin Belousov 	if ((flags & MAP_GUARD) != 0)
162019bd0d9cSKonstantin Belousov 		docow |= MAP_CREATE_GUARD;
16215850152dSJohn Dyson 
16224648ba0aSKonstantin Belousov 	if (fitit) {
16235aa60b6fSJohn Baldwin 		if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)
16245aa60b6fSJohn Baldwin 			findspace = VMFS_SUPER_SPACE;
16255aa60b6fSJohn Baldwin 		else if ((flags & MAP_ALIGNMENT_MASK) != 0)
16265aa60b6fSJohn Baldwin 			findspace = VMFS_ALIGNED_SPACE(flags >>
16275aa60b6fSJohn Baldwin 			    MAP_ALIGNMENT_SHIFT);
16282267af78SJulian Elischer 		else
16295aa60b6fSJohn Baldwin 			findspace = VMFS_OPTIMAL_SPACE;
16306a97a3f7SKonstantin Belousov 		max_addr = 0;
1631edb572a3SJohn Baldwin #ifdef MAP_32BIT
16326a97a3f7SKonstantin Belousov 		if ((flags & MAP_32BIT) != 0)
16336a97a3f7SKonstantin Belousov 			max_addr = MAP_32BIT_MAX_ADDR;
1634edb572a3SJohn Baldwin #endif
16356a97a3f7SKonstantin Belousov 		if (curmap) {
16366a97a3f7SKonstantin Belousov 			rv = vm_map_find_min(map, object, foff, addr, size,
16376a97a3f7SKonstantin Belousov 			    round_page((vm_offset_t)td->td_proc->p_vmspace->
16386a97a3f7SKonstantin Belousov 			    vm_daddr + lim_max(td, RLIMIT_DATA)), max_addr,
16396a97a3f7SKonstantin Belousov 			    findspace, prot, maxprot, docow);
16406a97a3f7SKonstantin Belousov 		} else {
16416a97a3f7SKonstantin Belousov 			rv = vm_map_find(map, object, foff, addr, size,
16426a97a3f7SKonstantin Belousov 			    max_addr, findspace, prot, maxprot, docow);
16436a97a3f7SKonstantin Belousov 		}
16444648ba0aSKonstantin Belousov 	} else {
1645b8ca4ef2SAlan Cox 		rv = vm_map_fixed(map, object, foff, *addr, size,
1646bd7e5f99SJohn Dyson 		    prot, maxprot, docow);
16474648ba0aSKonstantin Belousov 	}
1648bd7e5f99SJohn Dyson 
1649f9230ad6SAlan Cox 	if (rv == KERN_SUCCESS) {
16507fb0c17eSDavid Greenman 		/*
1651f9230ad6SAlan Cox 		 * If the process has requested that all future mappings
1652f9230ad6SAlan Cox 		 * be wired, then heed this.
1653f9230ad6SAlan Cox 		 */
165454a3a114SMark Johnston 		if ((map->flags & MAP_WIREFUTURE) != 0) {
165554a3a114SMark Johnston 			vm_map_lock(map);
165654a3a114SMark Johnston 			if ((map->flags & MAP_WIREFUTURE) != 0)
16578cd6a80dSMark Johnston 				(void)vm_map_wire_locked(map, *addr,
165854a3a114SMark Johnston 				    *addr + size, VM_MAP_WIRE_USER |
165954a3a114SMark Johnston 				    ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK :
166054a3a114SMark Johnston 				    VM_MAP_WIRE_NOHOLES));
166154a3a114SMark Johnston 			vm_map_unlock(map);
16621472f4f4SKonstantin Belousov 		}
1663df8bae1dSRodney W. Grimes 	}
16642e32165cSKonstantin Belousov 	return (vm_mmap_to_errno(rv));
16652e32165cSKonstantin Belousov }
16662e32165cSKonstantin Belousov 
1667f9230ad6SAlan Cox /*
1668f9230ad6SAlan Cox  * Translate a Mach VM return code to zero on success or the appropriate errno
1669f9230ad6SAlan Cox  * on failure.
1670f9230ad6SAlan Cox  */
16712e32165cSKonstantin Belousov int
16722e32165cSKonstantin Belousov vm_mmap_to_errno(int rv)
16732e32165cSKonstantin Belousov {
16742e32165cSKonstantin Belousov 
1675df8bae1dSRodney W. Grimes 	switch (rv) {
1676df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
1677df8bae1dSRodney W. Grimes 		return (0);
1678df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
1679df8bae1dSRodney W. Grimes 	case KERN_NO_SPACE:
1680df8bae1dSRodney W. Grimes 		return (ENOMEM);
1681df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
1682df8bae1dSRodney W. Grimes 		return (EACCES);
1683df8bae1dSRodney W. Grimes 	default:
1684df8bae1dSRodney W. Grimes 		return (EINVAL);
1685df8bae1dSRodney W. Grimes 	}
1686df8bae1dSRodney W. Grimes }
1687