xref: /freebsd/sys/vm/vm_mmap.c (revision df8bae1de4b67ccf57f4afebd4e2bf258c38910d)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1988 University of Utah.
3df8bae1dSRodney W. Grimes  * Copyright (c) 1991, 1993
4df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
5df8bae1dSRodney W. Grimes  *
6df8bae1dSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
7df8bae1dSRodney W. Grimes  * the Systems Programming Group of the University of Utah Computer
8df8bae1dSRodney W. Grimes  * Science Department.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39df8bae1dSRodney W. Grimes  *
40df8bae1dSRodney W. Grimes  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
41df8bae1dSRodney W. Grimes  */
42df8bae1dSRodney W. Grimes 
43df8bae1dSRodney W. Grimes /*
44df8bae1dSRodney W. Grimes  * Mapped file (mmap) interface to VM
45df8bae1dSRodney W. Grimes  */
46df8bae1dSRodney W. Grimes 
47df8bae1dSRodney W. Grimes #include <sys/param.h>
48df8bae1dSRodney W. Grimes #include <sys/systm.h>
49df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
50df8bae1dSRodney W. Grimes #include <sys/resourcevar.h>
51df8bae1dSRodney W. Grimes #include <sys/proc.h>
52df8bae1dSRodney W. Grimes #include <sys/vnode.h>
53df8bae1dSRodney W. Grimes #include <sys/file.h>
54df8bae1dSRodney W. Grimes #include <sys/mman.h>
55df8bae1dSRodney W. Grimes #include <sys/conf.h>
56df8bae1dSRodney W. Grimes 
57df8bae1dSRodney W. Grimes #include <miscfs/specfs/specdev.h>
58df8bae1dSRodney W. Grimes 
59df8bae1dSRodney W. Grimes #include <vm/vm.h>
60df8bae1dSRodney W. Grimes #include <vm/vm_pager.h>
61df8bae1dSRodney W. Grimes #include <vm/vm_prot.h>
62df8bae1dSRodney W. Grimes 
63df8bae1dSRodney W. Grimes #ifdef DEBUG
64df8bae1dSRodney W. Grimes int mmapdebug = 0;
65df8bae1dSRodney W. Grimes #define MDB_FOLLOW	0x01
66df8bae1dSRodney W. Grimes #define MDB_SYNC	0x02
67df8bae1dSRodney W. Grimes #define MDB_MAPIT	0x04
68df8bae1dSRodney W. Grimes #endif
69df8bae1dSRodney W. Grimes 
70df8bae1dSRodney W. Grimes struct sbrk_args {
71df8bae1dSRodney W. Grimes 	int	incr;
72df8bae1dSRodney W. Grimes };
73df8bae1dSRodney W. Grimes /* ARGSUSED */
74df8bae1dSRodney W. Grimes int
75df8bae1dSRodney W. Grimes sbrk(p, uap, retval)
76df8bae1dSRodney W. Grimes 	struct proc *p;
77df8bae1dSRodney W. Grimes 	struct sbrk_args *uap;
78df8bae1dSRodney W. Grimes 	int *retval;
79df8bae1dSRodney W. Grimes {
80df8bae1dSRodney W. Grimes 
81df8bae1dSRodney W. Grimes 	/* Not yet implemented */
82df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
83df8bae1dSRodney W. Grimes }
84df8bae1dSRodney W. Grimes 
85df8bae1dSRodney W. Grimes struct sstk_args {
86df8bae1dSRodney W. Grimes 	int	incr;
87df8bae1dSRodney W. Grimes };
88df8bae1dSRodney W. Grimes /* ARGSUSED */
89df8bae1dSRodney W. Grimes int
90df8bae1dSRodney W. Grimes sstk(p, uap, retval)
91df8bae1dSRodney W. Grimes 	struct proc *p;
92df8bae1dSRodney W. Grimes 	struct sstk_args *uap;
93df8bae1dSRodney W. Grimes 	int *retval;
94df8bae1dSRodney W. Grimes {
95df8bae1dSRodney W. Grimes 
96df8bae1dSRodney W. Grimes 	/* Not yet implemented */
97df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
98df8bae1dSRodney W. Grimes }
99df8bae1dSRodney W. Grimes 
100df8bae1dSRodney W. Grimes #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
101df8bae1dSRodney W. Grimes struct getpagesize_args {
102df8bae1dSRodney W. Grimes 	int	dummy;
103df8bae1dSRodney W. Grimes };
104df8bae1dSRodney W. Grimes /* ARGSUSED */
105df8bae1dSRodney W. Grimes int
106df8bae1dSRodney W. Grimes ogetpagesize(p, uap, retval)
107df8bae1dSRodney W. Grimes 	struct proc *p;
108df8bae1dSRodney W. Grimes 	struct getpagesize_args *uap;
109df8bae1dSRodney W. Grimes 	int *retval;
110df8bae1dSRodney W. Grimes {
111df8bae1dSRodney W. Grimes 
112df8bae1dSRodney W. Grimes 	*retval = PAGE_SIZE;
113df8bae1dSRodney W. Grimes 	return (0);
114df8bae1dSRodney W. Grimes }
115df8bae1dSRodney W. Grimes #endif /* COMPAT_43 || COMPAT_SUNOS */
116df8bae1dSRodney W. Grimes 
117df8bae1dSRodney W. Grimes struct mmap_args {
118df8bae1dSRodney W. Grimes 	caddr_t	addr;
119df8bae1dSRodney W. Grimes 	size_t	len;
120df8bae1dSRodney W. Grimes 	int	prot;
121df8bae1dSRodney W. Grimes 	int	flags;
122df8bae1dSRodney W. Grimes 	int	fd;
123df8bae1dSRodney W. Grimes 	long	pad;
124df8bae1dSRodney W. Grimes 	off_t	pos;
125df8bae1dSRodney W. Grimes };
126df8bae1dSRodney W. Grimes 
127df8bae1dSRodney W. Grimes #ifdef COMPAT_43
128df8bae1dSRodney W. Grimes struct ommap_args {
129df8bae1dSRodney W. Grimes 	caddr_t	addr;
130df8bae1dSRodney W. Grimes 	int	len;
131df8bae1dSRodney W. Grimes 	int	prot;
132df8bae1dSRodney W. Grimes 	int	flags;
133df8bae1dSRodney W. Grimes 	int	fd;
134df8bae1dSRodney W. Grimes 	long	pos;
135df8bae1dSRodney W. Grimes };
136df8bae1dSRodney W. Grimes int
137df8bae1dSRodney W. Grimes ommap(p, uap, retval)
138df8bae1dSRodney W. Grimes 	struct proc *p;
139df8bae1dSRodney W. Grimes 	register struct ommap_args *uap;
140df8bae1dSRodney W. Grimes 	int *retval;
141df8bae1dSRodney W. Grimes {
142df8bae1dSRodney W. Grimes 	struct mmap_args nargs;
143df8bae1dSRodney W. Grimes 	static const char cvtbsdprot[8] = {
144df8bae1dSRodney W. Grimes 		0,
145df8bae1dSRodney W. Grimes 		PROT_EXEC,
146df8bae1dSRodney W. Grimes 		PROT_WRITE,
147df8bae1dSRodney W. Grimes 		PROT_EXEC|PROT_WRITE,
148df8bae1dSRodney W. Grimes 		PROT_READ,
149df8bae1dSRodney W. Grimes 		PROT_EXEC|PROT_READ,
150df8bae1dSRodney W. Grimes 		PROT_WRITE|PROT_READ,
151df8bae1dSRodney W. Grimes 		PROT_EXEC|PROT_WRITE|PROT_READ,
152df8bae1dSRodney W. Grimes 	};
153df8bae1dSRodney W. Grimes #define	OMAP_ANON	0x0002
154df8bae1dSRodney W. Grimes #define	OMAP_COPY	0x0020
155df8bae1dSRodney W. Grimes #define	OMAP_SHARED	0x0010
156df8bae1dSRodney W. Grimes #define	OMAP_FIXED	0x0100
157df8bae1dSRodney W. Grimes #define	OMAP_INHERIT	0x0800
158df8bae1dSRodney W. Grimes 
159df8bae1dSRodney W. Grimes 	nargs.addr = uap->addr;
160df8bae1dSRodney W. Grimes 	nargs.len = uap->len;
161df8bae1dSRodney W. Grimes 	nargs.prot = cvtbsdprot[uap->prot&0x7];
162df8bae1dSRodney W. Grimes 	nargs.flags = 0;
163df8bae1dSRodney W. Grimes 	if (uap->flags & OMAP_ANON)
164df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_ANON;
165df8bae1dSRodney W. Grimes 	if (uap->flags & OMAP_COPY)
166df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_COPY;
167df8bae1dSRodney W. Grimes 	if (uap->flags & OMAP_SHARED)
168df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_SHARED;
169df8bae1dSRodney W. Grimes 	else
170df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_PRIVATE;
171df8bae1dSRodney W. Grimes 	if (uap->flags & OMAP_FIXED)
172df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_FIXED;
173df8bae1dSRodney W. Grimes 	if (uap->flags & OMAP_INHERIT)
174df8bae1dSRodney W. Grimes 		nargs.flags |= MAP_INHERIT;
175df8bae1dSRodney W. Grimes 	nargs.fd = uap->fd;
176df8bae1dSRodney W. Grimes 	nargs.pos = uap->pos;
177df8bae1dSRodney W. Grimes 	return (mmap(p, &nargs, retval));
178df8bae1dSRodney W. Grimes }
179df8bae1dSRodney W. Grimes #endif
180df8bae1dSRodney W. Grimes 
181df8bae1dSRodney W. Grimes int
182df8bae1dSRodney W. Grimes mmap(p, uap, retval)
183df8bae1dSRodney W. Grimes 	struct proc *p;
184df8bae1dSRodney W. Grimes 	register struct mmap_args *uap;
185df8bae1dSRodney W. Grimes 	int *retval;
186df8bae1dSRodney W. Grimes {
187df8bae1dSRodney W. Grimes 	register struct filedesc *fdp = p->p_fd;
188df8bae1dSRodney W. Grimes 	register struct file *fp;
189df8bae1dSRodney W. Grimes 	struct vnode *vp;
190df8bae1dSRodney W. Grimes 	vm_offset_t addr;
191df8bae1dSRodney W. Grimes 	vm_size_t size;
192df8bae1dSRodney W. Grimes 	vm_prot_t prot, maxprot;
193df8bae1dSRodney W. Grimes 	caddr_t handle;
194df8bae1dSRodney W. Grimes 	int flags, error;
195df8bae1dSRodney W. Grimes 
196df8bae1dSRodney W. Grimes 	prot = uap->prot & VM_PROT_ALL;
197df8bae1dSRodney W. Grimes 	flags = uap->flags;
198df8bae1dSRodney W. Grimes #ifdef DEBUG
199df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
200df8bae1dSRodney W. Grimes 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
201df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len, prot,
202df8bae1dSRodney W. Grimes 		       flags, uap->fd, (vm_offset_t)uap->pos);
203df8bae1dSRodney W. Grimes #endif
204df8bae1dSRodney W. Grimes 	/*
205df8bae1dSRodney W. Grimes 	 * Address (if FIXED) must be page aligned.
206df8bae1dSRodney W. Grimes 	 * Size is implicitly rounded to a page boundary.
207df8bae1dSRodney W. Grimes 	 */
208df8bae1dSRodney W. Grimes 	addr = (vm_offset_t) uap->addr;
209df8bae1dSRodney W. Grimes 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
210df8bae1dSRodney W. Grimes 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
211df8bae1dSRodney W. Grimes 		return (EINVAL);
212df8bae1dSRodney W. Grimes 	size = (vm_size_t) round_page(uap->len);
213df8bae1dSRodney W. Grimes 	/*
214df8bae1dSRodney W. Grimes 	 * Check for illegal addresses.  Watch out for address wrap...
215df8bae1dSRodney W. Grimes 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
216df8bae1dSRodney W. Grimes 	 */
217df8bae1dSRodney W. Grimes 	if (flags & MAP_FIXED) {
218df8bae1dSRodney W. Grimes 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
219df8bae1dSRodney W. Grimes 			return (EINVAL);
220df8bae1dSRodney W. Grimes 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
221df8bae1dSRodney W. Grimes 			return (EINVAL);
222df8bae1dSRodney W. Grimes 		if (addr > addr + size)
223df8bae1dSRodney W. Grimes 			return (EINVAL);
224df8bae1dSRodney W. Grimes 	}
225df8bae1dSRodney W. Grimes 	/*
226df8bae1dSRodney W. Grimes 	 * XXX if no hint provided for a non-fixed mapping place it after
227df8bae1dSRodney W. Grimes 	 * the end of the largest possible heap.
228df8bae1dSRodney W. Grimes 	 *
229df8bae1dSRodney W. Grimes 	 * There should really be a pmap call to determine a reasonable
230df8bae1dSRodney W. Grimes 	 * location.
231df8bae1dSRodney W. Grimes 	 */
232df8bae1dSRodney W. Grimes 	if (addr == 0 && (flags & MAP_FIXED) == 0)
233df8bae1dSRodney W. Grimes 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
234df8bae1dSRodney W. Grimes 	if (flags & MAP_ANON) {
235df8bae1dSRodney W. Grimes 		/*
236df8bae1dSRodney W. Grimes 		 * Mapping blank space is trivial.
237df8bae1dSRodney W. Grimes 		 */
238df8bae1dSRodney W. Grimes 		handle = NULL;
239df8bae1dSRodney W. Grimes 		maxprot = VM_PROT_ALL;
240df8bae1dSRodney W. Grimes 	} else {
241df8bae1dSRodney W. Grimes 		/*
242df8bae1dSRodney W. Grimes 		 * Mapping file, get fp for validation.
243df8bae1dSRodney W. Grimes 		 * Obtain vnode and make sure it is of appropriate type.
244df8bae1dSRodney W. Grimes 		 */
245df8bae1dSRodney W. Grimes 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
246df8bae1dSRodney W. Grimes 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
247df8bae1dSRodney W. Grimes 			return (EBADF);
248df8bae1dSRodney W. Grimes 		if (fp->f_type != DTYPE_VNODE)
249df8bae1dSRodney W. Grimes 			return (EINVAL);
250df8bae1dSRodney W. Grimes 		vp = (struct vnode *)fp->f_data;
251df8bae1dSRodney W. Grimes 		if (vp->v_type != VREG && vp->v_type != VCHR)
252df8bae1dSRodney W. Grimes 			return (EINVAL);
253df8bae1dSRodney W. Grimes 		/*
254df8bae1dSRodney W. Grimes 		 * XXX hack to handle use of /dev/zero to map anon
255df8bae1dSRodney W. Grimes 		 * memory (ala SunOS).
256df8bae1dSRodney W. Grimes 		 */
257df8bae1dSRodney W. Grimes 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
258df8bae1dSRodney W. Grimes 			handle = NULL;
259df8bae1dSRodney W. Grimes 			maxprot = VM_PROT_ALL;
260df8bae1dSRodney W. Grimes 			flags |= MAP_ANON;
261df8bae1dSRodney W. Grimes 		} else {
262df8bae1dSRodney W. Grimes 			/*
263df8bae1dSRodney W. Grimes 			 * Ensure that file and memory protections are
264df8bae1dSRodney W. Grimes 			 * compatible.  Note that we only worry about
265df8bae1dSRodney W. Grimes 			 * writability if mapping is shared; in this case,
266df8bae1dSRodney W. Grimes 			 * current and max prot are dictated by the open file.
267df8bae1dSRodney W. Grimes 			 * XXX use the vnode instead?  Problem is: what
268df8bae1dSRodney W. Grimes 			 * credentials do we use for determination?
269df8bae1dSRodney W. Grimes 			 * What if proc does a setuid?
270df8bae1dSRodney W. Grimes 			 */
271df8bae1dSRodney W. Grimes 			maxprot = VM_PROT_EXECUTE;	/* ??? */
272df8bae1dSRodney W. Grimes 			if (fp->f_flag & FREAD)
273df8bae1dSRodney W. Grimes 				maxprot |= VM_PROT_READ;
274df8bae1dSRodney W. Grimes 			else if (prot & PROT_READ)
275df8bae1dSRodney W. Grimes 				return (EACCES);
276df8bae1dSRodney W. Grimes 			if (flags & MAP_SHARED) {
277df8bae1dSRodney W. Grimes 				if (fp->f_flag & FWRITE)
278df8bae1dSRodney W. Grimes 					maxprot |= VM_PROT_WRITE;
279df8bae1dSRodney W. Grimes 				else if (prot & PROT_WRITE)
280df8bae1dSRodney W. Grimes 					return (EACCES);
281df8bae1dSRodney W. Grimes 			} else
282df8bae1dSRodney W. Grimes 				maxprot |= VM_PROT_WRITE;
283df8bae1dSRodney W. Grimes 			handle = (caddr_t)vp;
284df8bae1dSRodney W. Grimes 		}
285df8bae1dSRodney W. Grimes 	}
286df8bae1dSRodney W. Grimes 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
287df8bae1dSRodney W. Grimes 	    flags, handle, (vm_offset_t)uap->pos);
288df8bae1dSRodney W. Grimes 	if (error == 0)
289df8bae1dSRodney W. Grimes 		*retval = (int)addr;
290df8bae1dSRodney W. Grimes 	return (error);
291df8bae1dSRodney W. Grimes }
292df8bae1dSRodney W. Grimes 
293df8bae1dSRodney W. Grimes struct msync_args {
294df8bae1dSRodney W. Grimes 	caddr_t	addr;
295df8bae1dSRodney W. Grimes 	int	len;
296df8bae1dSRodney W. Grimes };
297df8bae1dSRodney W. Grimes int
298df8bae1dSRodney W. Grimes msync(p, uap, retval)
299df8bae1dSRodney W. Grimes 	struct proc *p;
300df8bae1dSRodney W. Grimes 	struct msync_args *uap;
301df8bae1dSRodney W. Grimes 	int *retval;
302df8bae1dSRodney W. Grimes {
303df8bae1dSRodney W. Grimes 	vm_offset_t addr;
304df8bae1dSRodney W. Grimes 	vm_size_t size;
305df8bae1dSRodney W. Grimes 	vm_map_t map;
306df8bae1dSRodney W. Grimes 	int rv;
307df8bae1dSRodney W. Grimes 	boolean_t syncio, invalidate;
308df8bae1dSRodney W. Grimes 
309df8bae1dSRodney W. Grimes #ifdef DEBUG
310df8bae1dSRodney W. Grimes 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
311df8bae1dSRodney W. Grimes 		printf("msync(%d): addr %x len %x\n",
312df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len);
313df8bae1dSRodney W. Grimes #endif
314df8bae1dSRodney W. Grimes 	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
315df8bae1dSRodney W. Grimes 		return (EINVAL);
316df8bae1dSRodney W. Grimes 	map = &p->p_vmspace->vm_map;
317df8bae1dSRodney W. Grimes 	addr = (vm_offset_t)uap->addr;
318df8bae1dSRodney W. Grimes 	size = (vm_size_t)uap->len;
319df8bae1dSRodney W. Grimes 	/*
320df8bae1dSRodney W. Grimes 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
321df8bae1dSRodney W. Grimes 	 * pages with the region containing addr".  Unfortunately, we
322df8bae1dSRodney W. Grimes 	 * don't really keep track of individual mmaps so we approximate
323df8bae1dSRodney W. Grimes 	 * by flushing the range of the map entry containing addr.
324df8bae1dSRodney W. Grimes 	 * This can be incorrect if the region splits or is coalesced
325df8bae1dSRodney W. Grimes 	 * with a neighbor.
326df8bae1dSRodney W. Grimes 	 */
327df8bae1dSRodney W. Grimes 	if (size == 0) {
328df8bae1dSRodney W. Grimes 		vm_map_entry_t entry;
329df8bae1dSRodney W. Grimes 
330df8bae1dSRodney W. Grimes 		vm_map_lock_read(map);
331df8bae1dSRodney W. Grimes 		rv = vm_map_lookup_entry(map, addr, &entry);
332df8bae1dSRodney W. Grimes 		vm_map_unlock_read(map);
333df8bae1dSRodney W. Grimes 		if (rv)
334df8bae1dSRodney W. Grimes 			return (EINVAL);
335df8bae1dSRodney W. Grimes 		addr = entry->start;
336df8bae1dSRodney W. Grimes 		size = entry->end - entry->start;
337df8bae1dSRodney W. Grimes 	}
338df8bae1dSRodney W. Grimes #ifdef DEBUG
339df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_SYNC)
340df8bae1dSRodney W. Grimes 		printf("msync: cleaning/flushing address range [%x-%x)\n",
341df8bae1dSRodney W. Grimes 		       addr, addr+size);
342df8bae1dSRodney W. Grimes #endif
343df8bae1dSRodney W. Grimes 	/*
344df8bae1dSRodney W. Grimes 	 * Could pass this in as a third flag argument to implement
345df8bae1dSRodney W. Grimes 	 * Sun's MS_ASYNC.
346df8bae1dSRodney W. Grimes 	 */
347df8bae1dSRodney W. Grimes 	syncio = TRUE;
348df8bae1dSRodney W. Grimes 	/*
349df8bae1dSRodney W. Grimes 	 * XXX bummer, gotta flush all cached pages to ensure
350df8bae1dSRodney W. Grimes 	 * consistency with the file system cache.  Otherwise, we could
351df8bae1dSRodney W. Grimes 	 * pass this in to implement Sun's MS_INVALIDATE.
352df8bae1dSRodney W. Grimes 	 */
353df8bae1dSRodney W. Grimes 	invalidate = TRUE;
354df8bae1dSRodney W. Grimes 	/*
355df8bae1dSRodney W. Grimes 	 * Clean the pages and interpret the return value.
356df8bae1dSRodney W. Grimes 	 */
357df8bae1dSRodney W. Grimes 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
358df8bae1dSRodney W. Grimes 	switch (rv) {
359df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
360df8bae1dSRodney W. Grimes 		break;
361df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
362df8bae1dSRodney W. Grimes 		return (EINVAL);	/* Sun returns ENOMEM? */
363df8bae1dSRodney W. Grimes 	case KERN_FAILURE:
364df8bae1dSRodney W. Grimes 		return (EIO);
365df8bae1dSRodney W. Grimes 	default:
366df8bae1dSRodney W. Grimes 		return (EINVAL);
367df8bae1dSRodney W. Grimes 	}
368df8bae1dSRodney W. Grimes 	return (0);
369df8bae1dSRodney W. Grimes }
370df8bae1dSRodney W. Grimes 
371df8bae1dSRodney W. Grimes struct munmap_args {
372df8bae1dSRodney W. Grimes 	caddr_t	addr;
373df8bae1dSRodney W. Grimes 	int	len;
374df8bae1dSRodney W. Grimes };
375df8bae1dSRodney W. Grimes int
376df8bae1dSRodney W. Grimes munmap(p, uap, retval)
377df8bae1dSRodney W. Grimes 	register struct proc *p;
378df8bae1dSRodney W. Grimes 	register struct munmap_args *uap;
379df8bae1dSRodney W. Grimes 	int *retval;
380df8bae1dSRodney W. Grimes {
381df8bae1dSRodney W. Grimes 	vm_offset_t addr;
382df8bae1dSRodney W. Grimes 	vm_size_t size;
383df8bae1dSRodney W. Grimes 	vm_map_t map;
384df8bae1dSRodney W. Grimes 
385df8bae1dSRodney W. Grimes #ifdef DEBUG
386df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
387df8bae1dSRodney W. Grimes 		printf("munmap(%d): addr %x len %x\n",
388df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len);
389df8bae1dSRodney W. Grimes #endif
390df8bae1dSRodney W. Grimes 
391df8bae1dSRodney W. Grimes 	addr = (vm_offset_t) uap->addr;
392df8bae1dSRodney W. Grimes 	if ((addr & PAGE_MASK) || uap->len < 0)
393df8bae1dSRodney W. Grimes 		return(EINVAL);
394df8bae1dSRodney W. Grimes 	size = (vm_size_t) round_page(uap->len);
395df8bae1dSRodney W. Grimes 	if (size == 0)
396df8bae1dSRodney W. Grimes 		return(0);
397df8bae1dSRodney W. Grimes 	/*
398df8bae1dSRodney W. Grimes 	 * Check for illegal addresses.  Watch out for address wrap...
399df8bae1dSRodney W. Grimes 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
400df8bae1dSRodney W. Grimes 	 */
401df8bae1dSRodney W. Grimes 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
402df8bae1dSRodney W. Grimes 		return (EINVAL);
403df8bae1dSRodney W. Grimes 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
404df8bae1dSRodney W. Grimes 		return (EINVAL);
405df8bae1dSRodney W. Grimes 	if (addr > addr + size)
406df8bae1dSRodney W. Grimes 		return (EINVAL);
407df8bae1dSRodney W. Grimes 	map = &p->p_vmspace->vm_map;
408df8bae1dSRodney W. Grimes 	/*
409df8bae1dSRodney W. Grimes 	 * Make sure entire range is allocated.
410df8bae1dSRodney W. Grimes 	 */
411df8bae1dSRodney W. Grimes 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
412df8bae1dSRodney W. Grimes 		return(EINVAL);
413df8bae1dSRodney W. Grimes 	/* returns nothing but KERN_SUCCESS anyway */
414df8bae1dSRodney W. Grimes 	(void) vm_map_remove(map, addr, addr+size);
415df8bae1dSRodney W. Grimes 	return(0);
416df8bae1dSRodney W. Grimes }
417df8bae1dSRodney W. Grimes 
418df8bae1dSRodney W. Grimes void
419df8bae1dSRodney W. Grimes munmapfd(fd)
420df8bae1dSRodney W. Grimes 	int fd;
421df8bae1dSRodney W. Grimes {
422df8bae1dSRodney W. Grimes #ifdef DEBUG
423df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
424df8bae1dSRodney W. Grimes 		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
425df8bae1dSRodney W. Grimes #endif
426df8bae1dSRodney W. Grimes 
427df8bae1dSRodney W. Grimes 	/*
428df8bae1dSRodney W. Grimes 	 * XXX should vm_deallocate any regions mapped to this file
429df8bae1dSRodney W. Grimes 	 */
430df8bae1dSRodney W. Grimes 	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
431df8bae1dSRodney W. Grimes }
432df8bae1dSRodney W. Grimes 
433df8bae1dSRodney W. Grimes struct mprotect_args {
434df8bae1dSRodney W. Grimes 	caddr_t	addr;
435df8bae1dSRodney W. Grimes 	int	len;
436df8bae1dSRodney W. Grimes 	int	prot;
437df8bae1dSRodney W. Grimes };
438df8bae1dSRodney W. Grimes int
439df8bae1dSRodney W. Grimes mprotect(p, uap, retval)
440df8bae1dSRodney W. Grimes 	struct proc *p;
441df8bae1dSRodney W. Grimes 	struct mprotect_args *uap;
442df8bae1dSRodney W. Grimes 	int *retval;
443df8bae1dSRodney W. Grimes {
444df8bae1dSRodney W. Grimes 	vm_offset_t addr;
445df8bae1dSRodney W. Grimes 	vm_size_t size;
446df8bae1dSRodney W. Grimes 	register vm_prot_t prot;
447df8bae1dSRodney W. Grimes 
448df8bae1dSRodney W. Grimes #ifdef DEBUG
449df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
450df8bae1dSRodney W. Grimes 		printf("mprotect(%d): addr %x len %x prot %d\n",
451df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len, uap->prot);
452df8bae1dSRodney W. Grimes #endif
453df8bae1dSRodney W. Grimes 
454df8bae1dSRodney W. Grimes 	addr = (vm_offset_t)uap->addr;
455df8bae1dSRodney W. Grimes 	if ((addr & PAGE_MASK) || uap->len < 0)
456df8bae1dSRodney W. Grimes 		return(EINVAL);
457df8bae1dSRodney W. Grimes 	size = (vm_size_t)uap->len;
458df8bae1dSRodney W. Grimes 	prot = uap->prot & VM_PROT_ALL;
459df8bae1dSRodney W. Grimes 
460df8bae1dSRodney W. Grimes 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
461df8bae1dSRodney W. Grimes 	    FALSE)) {
462df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
463df8bae1dSRodney W. Grimes 		return (0);
464df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
465df8bae1dSRodney W. Grimes 		return (EACCES);
466df8bae1dSRodney W. Grimes 	}
467df8bae1dSRodney W. Grimes 	return (EINVAL);
468df8bae1dSRodney W. Grimes }
469df8bae1dSRodney W. Grimes 
470df8bae1dSRodney W. Grimes struct madvise_args {
471df8bae1dSRodney W. Grimes 	caddr_t	addr;
472df8bae1dSRodney W. Grimes 	int	len;
473df8bae1dSRodney W. Grimes 	int	behav;
474df8bae1dSRodney W. Grimes };
475df8bae1dSRodney W. Grimes /* ARGSUSED */
476df8bae1dSRodney W. Grimes int
477df8bae1dSRodney W. Grimes madvise(p, uap, retval)
478df8bae1dSRodney W. Grimes 	struct proc *p;
479df8bae1dSRodney W. Grimes 	struct madvise_args *uap;
480df8bae1dSRodney W. Grimes 	int *retval;
481df8bae1dSRodney W. Grimes {
482df8bae1dSRodney W. Grimes 
483df8bae1dSRodney W. Grimes 	/* Not yet implemented */
484df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
485df8bae1dSRodney W. Grimes }
486df8bae1dSRodney W. Grimes 
487df8bae1dSRodney W. Grimes struct mincore_args {
488df8bae1dSRodney W. Grimes 	caddr_t	addr;
489df8bae1dSRodney W. Grimes 	int	len;
490df8bae1dSRodney W. Grimes 	char	*vec;
491df8bae1dSRodney W. Grimes };
492df8bae1dSRodney W. Grimes /* ARGSUSED */
493df8bae1dSRodney W. Grimes int
494df8bae1dSRodney W. Grimes mincore(p, uap, retval)
495df8bae1dSRodney W. Grimes 	struct proc *p;
496df8bae1dSRodney W. Grimes 	struct mincore_args *uap;
497df8bae1dSRodney W. Grimes 	int *retval;
498df8bae1dSRodney W. Grimes {
499df8bae1dSRodney W. Grimes 
500df8bae1dSRodney W. Grimes 	/* Not yet implemented */
501df8bae1dSRodney W. Grimes 	return (EOPNOTSUPP);
502df8bae1dSRodney W. Grimes }
503df8bae1dSRodney W. Grimes 
504df8bae1dSRodney W. Grimes struct mlock_args {
505df8bae1dSRodney W. Grimes 	caddr_t	addr;
506df8bae1dSRodney W. Grimes 	size_t	len;
507df8bae1dSRodney W. Grimes };
508df8bae1dSRodney W. Grimes int
509df8bae1dSRodney W. Grimes mlock(p, uap, retval)
510df8bae1dSRodney W. Grimes 	struct proc *p;
511df8bae1dSRodney W. Grimes 	struct mlock_args *uap;
512df8bae1dSRodney W. Grimes 	int *retval;
513df8bae1dSRodney W. Grimes {
514df8bae1dSRodney W. Grimes 	vm_offset_t addr;
515df8bae1dSRodney W. Grimes 	vm_size_t size;
516df8bae1dSRodney W. Grimes 	int error;
517df8bae1dSRodney W. Grimes 	extern int vm_page_max_wired;
518df8bae1dSRodney W. Grimes 
519df8bae1dSRodney W. Grimes #ifdef DEBUG
520df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
521df8bae1dSRodney W. Grimes 		printf("mlock(%d): addr %x len %x\n",
522df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len);
523df8bae1dSRodney W. Grimes #endif
524df8bae1dSRodney W. Grimes 	addr = (vm_offset_t)uap->addr;
525df8bae1dSRodney W. Grimes 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
526df8bae1dSRodney W. Grimes 		return (EINVAL);
527df8bae1dSRodney W. Grimes 	size = round_page((vm_size_t)uap->len);
528df8bae1dSRodney W. Grimes 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
529df8bae1dSRodney W. Grimes 		return (EAGAIN);
530df8bae1dSRodney W. Grimes #ifdef pmap_wired_count
531df8bae1dSRodney W. Grimes 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
532df8bae1dSRodney W. Grimes 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
533df8bae1dSRodney W. Grimes 		return (EAGAIN);
534df8bae1dSRodney W. Grimes #else
535df8bae1dSRodney W. Grimes 	if (error = suser(p->p_ucred, &p->p_acflag))
536df8bae1dSRodney W. Grimes 		return (error);
537df8bae1dSRodney W. Grimes #endif
538df8bae1dSRodney W. Grimes 
539df8bae1dSRodney W. Grimes 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
540df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
541df8bae1dSRodney W. Grimes }
542df8bae1dSRodney W. Grimes 
543df8bae1dSRodney W. Grimes struct munlock_args {
544df8bae1dSRodney W. Grimes 	caddr_t	addr;
545df8bae1dSRodney W. Grimes 	size_t	len;
546df8bae1dSRodney W. Grimes };
547df8bae1dSRodney W. Grimes int
548df8bae1dSRodney W. Grimes munlock(p, uap, retval)
549df8bae1dSRodney W. Grimes 	struct proc *p;
550df8bae1dSRodney W. Grimes 	struct munlock_args *uap;
551df8bae1dSRodney W. Grimes 	int *retval;
552df8bae1dSRodney W. Grimes {
553df8bae1dSRodney W. Grimes 	vm_offset_t addr;
554df8bae1dSRodney W. Grimes 	vm_size_t size;
555df8bae1dSRodney W. Grimes 	int error;
556df8bae1dSRodney W. Grimes 
557df8bae1dSRodney W. Grimes #ifdef DEBUG
558df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_FOLLOW)
559df8bae1dSRodney W. Grimes 		printf("munlock(%d): addr %x len %x\n",
560df8bae1dSRodney W. Grimes 		       p->p_pid, uap->addr, uap->len);
561df8bae1dSRodney W. Grimes #endif
562df8bae1dSRodney W. Grimes 	addr = (vm_offset_t)uap->addr;
563df8bae1dSRodney W. Grimes 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
564df8bae1dSRodney W. Grimes 		return (EINVAL);
565df8bae1dSRodney W. Grimes #ifndef pmap_wired_count
566df8bae1dSRodney W. Grimes 	if (error = suser(p->p_ucred, &p->p_acflag))
567df8bae1dSRodney W. Grimes 		return (error);
568df8bae1dSRodney W. Grimes #endif
569df8bae1dSRodney W. Grimes 	size = round_page((vm_size_t)uap->len);
570df8bae1dSRodney W. Grimes 
571df8bae1dSRodney W. Grimes 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
572df8bae1dSRodney W. Grimes 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
573df8bae1dSRodney W. Grimes }
574df8bae1dSRodney W. Grimes 
575df8bae1dSRodney W. Grimes /*
576df8bae1dSRodney W. Grimes  * Internal version of mmap.
577df8bae1dSRodney W. Grimes  * Currently used by mmap, exec, and sys5 shared memory.
578df8bae1dSRodney W. Grimes  * Handle is either a vnode pointer or NULL for MAP_ANON.
579df8bae1dSRodney W. Grimes  */
580df8bae1dSRodney W. Grimes int
581df8bae1dSRodney W. Grimes vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
582df8bae1dSRodney W. Grimes 	register vm_map_t map;
583df8bae1dSRodney W. Grimes 	register vm_offset_t *addr;
584df8bae1dSRodney W. Grimes 	register vm_size_t size;
585df8bae1dSRodney W. Grimes 	vm_prot_t prot, maxprot;
586df8bae1dSRodney W. Grimes 	register int flags;
587df8bae1dSRodney W. Grimes 	caddr_t handle;		/* XXX should be vp */
588df8bae1dSRodney W. Grimes 	vm_offset_t foff;
589df8bae1dSRodney W. Grimes {
590df8bae1dSRodney W. Grimes 	register vm_pager_t pager;
591df8bae1dSRodney W. Grimes 	boolean_t fitit;
592df8bae1dSRodney W. Grimes 	vm_object_t object;
593df8bae1dSRodney W. Grimes 	struct vnode *vp = NULL;
594df8bae1dSRodney W. Grimes 	int type;
595df8bae1dSRodney W. Grimes 	int rv = KERN_SUCCESS;
596df8bae1dSRodney W. Grimes 
597df8bae1dSRodney W. Grimes 	if (size == 0)
598df8bae1dSRodney W. Grimes 		return (0);
599df8bae1dSRodney W. Grimes 
600df8bae1dSRodney W. Grimes 	if ((flags & MAP_FIXED) == 0) {
601df8bae1dSRodney W. Grimes 		fitit = TRUE;
602df8bae1dSRodney W. Grimes 		*addr = round_page(*addr);
603df8bae1dSRodney W. Grimes 	} else {
604df8bae1dSRodney W. Grimes 		fitit = FALSE;
605df8bae1dSRodney W. Grimes 		(void)vm_deallocate(map, *addr, size);
606df8bae1dSRodney W. Grimes 	}
607df8bae1dSRodney W. Grimes 
608df8bae1dSRodney W. Grimes 	/*
609df8bae1dSRodney W. Grimes 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
610df8bae1dSRodney W. Grimes 	 * gain a reference to ensure continued existance of the object.
611df8bae1dSRodney W. Grimes 	 * (XXX the exception is to appease the pageout daemon)
612df8bae1dSRodney W. Grimes 	 */
613df8bae1dSRodney W. Grimes 	if (flags & MAP_ANON)
614df8bae1dSRodney W. Grimes 		type = PG_DFLT;
615df8bae1dSRodney W. Grimes 	else {
616df8bae1dSRodney W. Grimes 		vp = (struct vnode *)handle;
617df8bae1dSRodney W. Grimes 		if (vp->v_type == VCHR) {
618df8bae1dSRodney W. Grimes 			type = PG_DEVICE;
619df8bae1dSRodney W. Grimes 			handle = (caddr_t)vp->v_rdev;
620df8bae1dSRodney W. Grimes 		} else
621df8bae1dSRodney W. Grimes 			type = PG_VNODE;
622df8bae1dSRodney W. Grimes 	}
623df8bae1dSRodney W. Grimes 	pager = vm_pager_allocate(type, handle, size, prot, foff);
624df8bae1dSRodney W. Grimes 	if (pager == NULL)
625df8bae1dSRodney W. Grimes 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
626df8bae1dSRodney W. Grimes 	/*
627df8bae1dSRodney W. Grimes 	 * Find object and release extra reference gained by lookup
628df8bae1dSRodney W. Grimes 	 */
629df8bae1dSRodney W. Grimes 	object = vm_object_lookup(pager);
630df8bae1dSRodney W. Grimes 	vm_object_deallocate(object);
631df8bae1dSRodney W. Grimes 
632df8bae1dSRodney W. Grimes 	/*
633df8bae1dSRodney W. Grimes 	 * Anonymous memory.
634df8bae1dSRodney W. Grimes 	 */
635df8bae1dSRodney W. Grimes 	if (flags & MAP_ANON) {
636df8bae1dSRodney W. Grimes 		rv = vm_allocate_with_pager(map, addr, size, fitit,
637df8bae1dSRodney W. Grimes 					    pager, foff, TRUE);
638df8bae1dSRodney W. Grimes 		if (rv != KERN_SUCCESS) {
639df8bae1dSRodney W. Grimes 			if (handle == NULL)
640df8bae1dSRodney W. Grimes 				vm_pager_deallocate(pager);
641df8bae1dSRodney W. Grimes 			else
642df8bae1dSRodney W. Grimes 				vm_object_deallocate(object);
643df8bae1dSRodney W. Grimes 			goto out;
644df8bae1dSRodney W. Grimes 		}
645df8bae1dSRodney W. Grimes 		/*
646df8bae1dSRodney W. Grimes 		 * Don't cache anonymous objects.
647df8bae1dSRodney W. Grimes 		 * Loses the reference gained by vm_pager_allocate.
648df8bae1dSRodney W. Grimes 		 * Note that object will be NULL when handle == NULL,
649df8bae1dSRodney W. Grimes 		 * this is ok since vm_allocate_with_pager has made
650df8bae1dSRodney W. Grimes 		 * sure that these objects are uncached.
651df8bae1dSRodney W. Grimes 		 */
652df8bae1dSRodney W. Grimes 		(void) pager_cache(object, FALSE);
653df8bae1dSRodney W. Grimes #ifdef DEBUG
654df8bae1dSRodney W. Grimes 		if (mmapdebug & MDB_MAPIT)
655df8bae1dSRodney W. Grimes 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
656df8bae1dSRodney W. Grimes 			       curproc->p_pid, *addr, size, pager);
657df8bae1dSRodney W. Grimes #endif
658df8bae1dSRodney W. Grimes 	}
659df8bae1dSRodney W. Grimes 	/*
660df8bae1dSRodney W. Grimes 	 * Must be a mapped file.
661df8bae1dSRodney W. Grimes 	 * Distinguish between character special and regular files.
662df8bae1dSRodney W. Grimes 	 */
663df8bae1dSRodney W. Grimes 	else if (vp->v_type == VCHR) {
664df8bae1dSRodney W. Grimes 		rv = vm_allocate_with_pager(map, addr, size, fitit,
665df8bae1dSRodney W. Grimes 					    pager, foff, FALSE);
666df8bae1dSRodney W. Grimes 		/*
667df8bae1dSRodney W. Grimes 		 * Uncache the object and lose the reference gained
668df8bae1dSRodney W. Grimes 		 * by vm_pager_allocate().  If the call to
669df8bae1dSRodney W. Grimes 		 * vm_allocate_with_pager() was sucessful, then we
670df8bae1dSRodney W. Grimes 		 * gained an additional reference ensuring the object
671df8bae1dSRodney W. Grimes 		 * will continue to exist.  If the call failed then
672df8bae1dSRodney W. Grimes 		 * the deallocate call below will terminate the
673df8bae1dSRodney W. Grimes 		 * object which is fine.
674df8bae1dSRodney W. Grimes 		 */
675df8bae1dSRodney W. Grimes 		(void) pager_cache(object, FALSE);
676df8bae1dSRodney W. Grimes 		if (rv != KERN_SUCCESS)
677df8bae1dSRodney W. Grimes 			goto out;
678df8bae1dSRodney W. Grimes 	}
679df8bae1dSRodney W. Grimes 	/*
680df8bae1dSRodney W. Grimes 	 * A regular file
681df8bae1dSRodney W. Grimes 	 */
682df8bae1dSRodney W. Grimes 	else {
683df8bae1dSRodney W. Grimes #ifdef DEBUG
684df8bae1dSRodney W. Grimes 		if (object == NULL)
685df8bae1dSRodney W. Grimes 			printf("vm_mmap: no object: vp %x, pager %x\n",
686df8bae1dSRodney W. Grimes 			       vp, pager);
687df8bae1dSRodney W. Grimes #endif
688df8bae1dSRodney W. Grimes 		/*
689df8bae1dSRodney W. Grimes 		 * Map it directly.
690df8bae1dSRodney W. Grimes 		 * Allows modifications to go out to the vnode.
691df8bae1dSRodney W. Grimes 		 */
692df8bae1dSRodney W. Grimes 		if (flags & MAP_SHARED) {
693df8bae1dSRodney W. Grimes 			rv = vm_allocate_with_pager(map, addr, size,
694df8bae1dSRodney W. Grimes 						    fitit, pager,
695df8bae1dSRodney W. Grimes 						    foff, FALSE);
696df8bae1dSRodney W. Grimes 			if (rv != KERN_SUCCESS) {
697df8bae1dSRodney W. Grimes 				vm_object_deallocate(object);
698df8bae1dSRodney W. Grimes 				goto out;
699df8bae1dSRodney W. Grimes 			}
700df8bae1dSRodney W. Grimes 			/*
701df8bae1dSRodney W. Grimes 			 * Don't cache the object.  This is the easiest way
702df8bae1dSRodney W. Grimes 			 * of ensuring that data gets back to the filesystem
703df8bae1dSRodney W. Grimes 			 * because vnode_pager_deallocate() will fsync the
704df8bae1dSRodney W. Grimes 			 * vnode.  pager_cache() will lose the extra ref.
705df8bae1dSRodney W. Grimes 			 */
706df8bae1dSRodney W. Grimes 			if (prot & VM_PROT_WRITE)
707df8bae1dSRodney W. Grimes 				pager_cache(object, FALSE);
708df8bae1dSRodney W. Grimes 			else
709df8bae1dSRodney W. Grimes 				vm_object_deallocate(object);
710df8bae1dSRodney W. Grimes 		}
711df8bae1dSRodney W. Grimes 		/*
712df8bae1dSRodney W. Grimes 		 * Copy-on-write of file.  Two flavors.
713df8bae1dSRodney W. Grimes 		 * MAP_COPY is true COW, you essentially get a snapshot of
714df8bae1dSRodney W. Grimes 		 * the region at the time of mapping.  MAP_PRIVATE means only
715df8bae1dSRodney W. Grimes 		 * that your changes are not reflected back to the object.
716df8bae1dSRodney W. Grimes 		 * Changes made by others will be seen.
717df8bae1dSRodney W. Grimes 		 */
718df8bae1dSRodney W. Grimes 		else {
719df8bae1dSRodney W. Grimes 			vm_map_t tmap;
720df8bae1dSRodney W. Grimes 			vm_offset_t off;
721df8bae1dSRodney W. Grimes 
722df8bae1dSRodney W. Grimes 			/* locate and allocate the target address space */
723df8bae1dSRodney W. Grimes 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
724df8bae1dSRodney W. Grimes 					 addr, size, fitit);
725df8bae1dSRodney W. Grimes 			if (rv != KERN_SUCCESS) {
726df8bae1dSRodney W. Grimes 				vm_object_deallocate(object);
727df8bae1dSRodney W. Grimes 				goto out;
728df8bae1dSRodney W. Grimes 			}
729df8bae1dSRodney W. Grimes 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
730df8bae1dSRodney W. Grimes 					     VM_MIN_ADDRESS+size, TRUE);
731df8bae1dSRodney W. Grimes 			off = VM_MIN_ADDRESS;
732df8bae1dSRodney W. Grimes 			rv = vm_allocate_with_pager(tmap, &off, size,
733df8bae1dSRodney W. Grimes 						    TRUE, pager,
734df8bae1dSRodney W. Grimes 						    foff, FALSE);
735df8bae1dSRodney W. Grimes 			if (rv != KERN_SUCCESS) {
736df8bae1dSRodney W. Grimes 				vm_object_deallocate(object);
737df8bae1dSRodney W. Grimes 				vm_map_deallocate(tmap);
738df8bae1dSRodney W. Grimes 				goto out;
739df8bae1dSRodney W. Grimes 			}
740df8bae1dSRodney W. Grimes 			/*
741df8bae1dSRodney W. Grimes 			 * (XXX)
742df8bae1dSRodney W. Grimes 			 * MAP_PRIVATE implies that we see changes made by
743df8bae1dSRodney W. Grimes 			 * others.  To ensure that we need to guarentee that
744df8bae1dSRodney W. Grimes 			 * no copy object is created (otherwise original
745df8bae1dSRodney W. Grimes 			 * pages would be pushed to the copy object and we
746df8bae1dSRodney W. Grimes 			 * would never see changes made by others).  We
747df8bae1dSRodney W. Grimes 			 * totally sleeze it right now by marking the object
748df8bae1dSRodney W. Grimes 			 * internal temporarily.
749df8bae1dSRodney W. Grimes 			 */
750df8bae1dSRodney W. Grimes 			if ((flags & MAP_COPY) == 0)
751df8bae1dSRodney W. Grimes 				object->flags |= OBJ_INTERNAL;
752df8bae1dSRodney W. Grimes 			rv = vm_map_copy(map, tmap, *addr, size, off,
753df8bae1dSRodney W. Grimes 					 FALSE, FALSE);
754df8bae1dSRodney W. Grimes 			object->flags &= ~OBJ_INTERNAL;
755df8bae1dSRodney W. Grimes 			/*
756df8bae1dSRodney W. Grimes 			 * (XXX)
757df8bae1dSRodney W. Grimes 			 * My oh my, this only gets worse...
758df8bae1dSRodney W. Grimes 			 * Force creation of a shadow object so that
759df8bae1dSRodney W. Grimes 			 * vm_map_fork will do the right thing.
760df8bae1dSRodney W. Grimes 			 */
761df8bae1dSRodney W. Grimes 			if ((flags & MAP_COPY) == 0) {
762df8bae1dSRodney W. Grimes 				vm_map_t tmap;
763df8bae1dSRodney W. Grimes 				vm_map_entry_t tentry;
764df8bae1dSRodney W. Grimes 				vm_object_t tobject;
765df8bae1dSRodney W. Grimes 				vm_offset_t toffset;
766df8bae1dSRodney W. Grimes 				vm_prot_t tprot;
767df8bae1dSRodney W. Grimes 				boolean_t twired, tsu;
768df8bae1dSRodney W. Grimes 
769df8bae1dSRodney W. Grimes 				tmap = map;
770df8bae1dSRodney W. Grimes 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
771df8bae1dSRodney W. Grimes 					      &tentry, &tobject, &toffset,
772df8bae1dSRodney W. Grimes 					      &tprot, &twired, &tsu);
773df8bae1dSRodney W. Grimes 				vm_map_lookup_done(tmap, tentry);
774df8bae1dSRodney W. Grimes 			}
775df8bae1dSRodney W. Grimes 			/*
776df8bae1dSRodney W. Grimes 			 * (XXX)
777df8bae1dSRodney W. Grimes 			 * Map copy code cannot detect sharing unless a
778df8bae1dSRodney W. Grimes 			 * sharing map is involved.  So we cheat and write
779df8bae1dSRodney W. Grimes 			 * protect everything ourselves.
780df8bae1dSRodney W. Grimes 			 */
781df8bae1dSRodney W. Grimes 			vm_object_pmap_copy(object, foff, foff + size);
782df8bae1dSRodney W. Grimes 			vm_object_deallocate(object);
783df8bae1dSRodney W. Grimes 			vm_map_deallocate(tmap);
784df8bae1dSRodney W. Grimes 			if (rv != KERN_SUCCESS)
785df8bae1dSRodney W. Grimes 				goto out;
786df8bae1dSRodney W. Grimes 		}
787df8bae1dSRodney W. Grimes #ifdef DEBUG
788df8bae1dSRodney W. Grimes 		if (mmapdebug & MDB_MAPIT)
789df8bae1dSRodney W. Grimes 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
790df8bae1dSRodney W. Grimes 			       curproc->p_pid, *addr, size, pager);
791df8bae1dSRodney W. Grimes #endif
792df8bae1dSRodney W. Grimes 	}
793df8bae1dSRodney W. Grimes 	/*
794df8bae1dSRodney W. Grimes 	 * Correct protection (default is VM_PROT_ALL).
795df8bae1dSRodney W. Grimes 	 * If maxprot is different than prot, we must set both explicitly.
796df8bae1dSRodney W. Grimes 	 */
797df8bae1dSRodney W. Grimes 	rv = KERN_SUCCESS;
798df8bae1dSRodney W. Grimes 	if (maxprot != VM_PROT_ALL)
799df8bae1dSRodney W. Grimes 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
800df8bae1dSRodney W. Grimes 	if (rv == KERN_SUCCESS && prot != maxprot)
801df8bae1dSRodney W. Grimes 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
802df8bae1dSRodney W. Grimes 	if (rv != KERN_SUCCESS) {
803df8bae1dSRodney W. Grimes 		(void) vm_deallocate(map, *addr, size);
804df8bae1dSRodney W. Grimes 		goto out;
805df8bae1dSRodney W. Grimes 	}
806df8bae1dSRodney W. Grimes 	/*
807df8bae1dSRodney W. Grimes 	 * Shared memory is also shared with children.
808df8bae1dSRodney W. Grimes 	 */
809df8bae1dSRodney W. Grimes 	if (flags & MAP_SHARED) {
810df8bae1dSRodney W. Grimes 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
811df8bae1dSRodney W. Grimes 		if (rv != KERN_SUCCESS) {
812df8bae1dSRodney W. Grimes 			(void) vm_deallocate(map, *addr, size);
813df8bae1dSRodney W. Grimes 			goto out;
814df8bae1dSRodney W. Grimes 		}
815df8bae1dSRodney W. Grimes 	}
816df8bae1dSRodney W. Grimes out:
817df8bae1dSRodney W. Grimes #ifdef DEBUG
818df8bae1dSRodney W. Grimes 	if (mmapdebug & MDB_MAPIT)
819df8bae1dSRodney W. Grimes 		printf("vm_mmap: rv %d\n", rv);
820df8bae1dSRodney W. Grimes #endif
821df8bae1dSRodney W. Grimes 	switch (rv) {
822df8bae1dSRodney W. Grimes 	case KERN_SUCCESS:
823df8bae1dSRodney W. Grimes 		return (0);
824df8bae1dSRodney W. Grimes 	case KERN_INVALID_ADDRESS:
825df8bae1dSRodney W. Grimes 	case KERN_NO_SPACE:
826df8bae1dSRodney W. Grimes 		return (ENOMEM);
827df8bae1dSRodney W. Grimes 	case KERN_PROTECTION_FAILURE:
828df8bae1dSRodney W. Grimes 		return (EACCES);
829df8bae1dSRodney W. Grimes 	default:
830df8bae1dSRodney W. Grimes 		return (EINVAL);
831df8bae1dSRodney W. Grimes 	}
832df8bae1dSRodney W. Grimes }
833