xref: /freebsd/sys/vm/vm_mmap.c (revision a316b26e50bbed7cf655fbba726ab87d8ab7599d)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39  *
40  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
41  * $Id: vm_mmap.c,v 1.7 1994/10/09 01:52:11 phk Exp $
42  */
43 
44 /*
45  * Mapped file (mmap) interface to VM
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/filedesc.h>
51 #include <sys/resourcevar.h>
52 #include <sys/proc.h>
53 #include <sys/vnode.h>
54 #include <sys/file.h>
55 #include <sys/mman.h>
56 #include <sys/conf.h>
57 
58 #include <miscfs/specfs/specdev.h>
59 
60 #include <vm/vm.h>
61 #include <vm/vm_pager.h>
62 #include <vm/vm_prot.h>
63 
64 #ifdef DEBUG
65 int mmapdebug = 0;
66 
67 #define MDB_FOLLOW	0x01
68 #define MDB_SYNC	0x02
69 #define MDB_MAPIT	0x04
70 #endif
71 
72 void pmap_object_init_pt();
73 
74 struct sbrk_args {
75 	int incr;
76 };
77 
78 /* ARGSUSED */
79 int
80 sbrk(p, uap, retval)
81 	struct proc *p;
82 	struct sbrk_args *uap;
83 	int *retval;
84 {
85 
86 	/* Not yet implemented */
87 	return (EOPNOTSUPP);
88 }
89 
90 struct sstk_args {
91 	int incr;
92 };
93 
94 /* ARGSUSED */
95 int
96 sstk(p, uap, retval)
97 	struct proc *p;
98 	struct sstk_args *uap;
99 	int *retval;
100 {
101 
102 	/* Not yet implemented */
103 	return (EOPNOTSUPP);
104 }
105 
106 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
107 struct getpagesize_args {
108 	int dummy;
109 };
110 
111 /* ARGSUSED */
112 int
113 ogetpagesize(p, uap, retval)
114 	struct proc *p;
115 	struct getpagesize_args *uap;
116 	int *retval;
117 {
118 
119 	*retval = PAGE_SIZE;
120 	return (0);
121 }
122 #endif				/* COMPAT_43 || COMPAT_SUNOS */
123 
124 struct mmap_args {
125 	caddr_t addr;
126 	size_t len;
127 	int prot;
128 	int flags;
129 	int fd;
130 	long pad;
131 	off_t pos;
132 };
133 
134 int
135 mmap(p, uap, retval)
136 	struct proc *p;
137 	register struct mmap_args *uap;
138 	int *retval;
139 {
140 	register struct filedesc *fdp = p->p_fd;
141 	register struct file *fp;
142 	struct vnode *vp;
143 	vm_offset_t addr;
144 	vm_size_t size;
145 	vm_prot_t prot, maxprot;
146 	caddr_t handle;
147 	int flags, error;
148 
149 	prot = uap->prot & VM_PROT_ALL;
150 	flags = uap->flags;
151 #ifdef DEBUG
152 	if (mmapdebug & MDB_FOLLOW)
153 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
154 		    p->p_pid, uap->addr, uap->len, prot,
155 		    flags, uap->fd, (vm_offset_t) uap->pos);
156 #endif
157 	/*
158 	 * Address (if FIXED) must be page aligned. Size is implicitly rounded
159 	 * to a page boundary.
160 	 */
161 	addr = (vm_offset_t) uap->addr;
162 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
163 	    (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
164 		return (EINVAL);
165 	size = (vm_size_t) round_page(uap->len);
166 	/*
167 	 * Check for illegal addresses.  Watch out for address wrap... Note
168 	 * that VM_*_ADDRESS are not constants due to casts (argh).
169 	 */
170 	if (flags & MAP_FIXED) {
171 		if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
172 			return (EINVAL);
173 #ifndef i386
174 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
175 			return (EINVAL);
176 #endif
177 		if (addr + size < addr)
178 			return (EINVAL);
179 	}
180 	/*
181 	 * XXX if no hint provided for a non-fixed mapping place it after the
182 	 * end of the largest possible heap.
183 	 *
184 	 * There should really be a pmap call to determine a reasonable location.
185 	 */
186 	if (addr == 0 && (flags & MAP_FIXED) == 0)
187 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
188 	if (flags & MAP_ANON) {
189 		/*
190 		 * Mapping blank space is trivial.
191 		 */
192 		handle = NULL;
193 		maxprot = VM_PROT_ALL;
194 	} else {
195 		/*
196 		 * Mapping file, get fp for validation. Obtain vnode and make
197 		 * sure it is of appropriate type.
198 		 */
199 		if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
200 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
201 			return (EBADF);
202 		if (fp->f_type != DTYPE_VNODE)
203 			return (EINVAL);
204 		vp = (struct vnode *) fp->f_data;
205 		if (vp->v_type != VREG && vp->v_type != VCHR)
206 			return (EINVAL);
207 		/*
208 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
209 		 * SunOS).
210 		 */
211 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
212 			handle = NULL;
213 			maxprot = VM_PROT_ALL;
214 			flags |= MAP_ANON;
215 		} else {
216 			/*
217 			 * Ensure that file and memory protections are
218 			 * compatible.  Note that we only worry about
219 			 * writability if mapping is shared; in this case,
220 			 * current and max prot are dictated by the open file.
221 			 * XXX use the vnode instead?  Problem is: what
222 			 * credentials do we use for determination? What if
223 			 * proc does a setuid?
224 			 */
225 			maxprot = VM_PROT_EXECUTE;	/* ??? */
226 			if (fp->f_flag & FREAD)
227 				maxprot |= VM_PROT_READ;
228 			else if (prot & PROT_READ)
229 				return (EACCES);
230 			if (flags & MAP_SHARED) {
231 				if (fp->f_flag & FWRITE)
232 					maxprot |= VM_PROT_WRITE;
233 				else if (prot & PROT_WRITE)
234 					return (EACCES);
235 			} else
236 				maxprot |= VM_PROT_WRITE;
237 			handle = (caddr_t) vp;
238 		}
239 	}
240 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
241 	    flags, handle, (vm_offset_t) uap->pos);
242 	if (error == 0)
243 		*retval = (int) addr;
244 	return (error);
245 }
246 
247 #ifdef COMPAT_43
248 struct ommap_args {
249 	caddr_t addr;
250 	int len;
251 	int prot;
252 	int flags;
253 	int fd;
254 	long pos;
255 };
256 int
257 ommap(p, uap, retval)
258 	struct proc *p;
259 	register struct ommap_args *uap;
260 	int *retval;
261 {
262 	struct mmap_args nargs;
263 	static const char cvtbsdprot[8] = {
264 		0,
265 		PROT_EXEC,
266 		PROT_WRITE,
267 		PROT_EXEC | PROT_WRITE,
268 		PROT_READ,
269 		PROT_EXEC | PROT_READ,
270 		PROT_WRITE | PROT_READ,
271 		PROT_EXEC | PROT_WRITE | PROT_READ,
272 	};
273 
274 #define	OMAP_ANON	0x0002
275 #define	OMAP_COPY	0x0020
276 #define	OMAP_SHARED	0x0010
277 #define	OMAP_FIXED	0x0100
278 #define	OMAP_INHERIT	0x0800
279 
280 	nargs.addr = uap->addr;
281 	nargs.len = uap->len;
282 	nargs.prot = cvtbsdprot[uap->prot & 0x7];
283 	nargs.flags = 0;
284 	if (uap->flags & OMAP_ANON)
285 		nargs.flags |= MAP_ANON;
286 	if (uap->flags & OMAP_COPY)
287 		nargs.flags |= MAP_COPY;
288 	if (uap->flags & OMAP_SHARED)
289 		nargs.flags |= MAP_SHARED;
290 	else
291 		nargs.flags |= MAP_PRIVATE;
292 	if (uap->flags & OMAP_FIXED)
293 		nargs.flags |= MAP_FIXED;
294 	if (uap->flags & OMAP_INHERIT)
295 		nargs.flags |= MAP_INHERIT;
296 	nargs.fd = uap->fd;
297 	nargs.pos = uap->pos;
298 	return (mmap(p, &nargs, retval));
299 }
300 #endif				/* COMPAT_43 */
301 
302 
303 struct msync_args {
304 	caddr_t addr;
305 	int len;
306 };
307 int
308 msync(p, uap, retval)
309 	struct proc *p;
310 	struct msync_args *uap;
311 	int *retval;
312 {
313 	vm_offset_t addr;
314 	vm_size_t size;
315 	vm_map_t map;
316 	int rv;
317 	boolean_t syncio, invalidate;
318 
319 #ifdef DEBUG
320 	if (mmapdebug & (MDB_FOLLOW | MDB_SYNC))
321 		printf("msync(%d): addr %x len %x\n",
322 		    p->p_pid, uap->addr, uap->len);
323 #endif
324 	if (((int) uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
325 		return (EINVAL);
326 	map = &p->p_vmspace->vm_map;
327 	addr = (vm_offset_t) uap->addr;
328 	size = (vm_size_t) uap->len;
329 	/*
330 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
331 	 * pages with the region containing addr".  Unfortunately, we don't
332 	 * really keep track of individual mmaps so we approximate by flushing
333 	 * the range of the map entry containing addr. This can be incorrect
334 	 * if the region splits or is coalesced with a neighbor.
335 	 */
336 	if (size == 0) {
337 		vm_map_entry_t entry;
338 
339 		vm_map_lock_read(map);
340 		rv = vm_map_lookup_entry(map, addr, &entry);
341 		vm_map_unlock_read(map);
342 		if (rv)
343 			return (EINVAL);
344 		addr = entry->start;
345 		size = entry->end - entry->start;
346 	}
347 #ifdef DEBUG
348 	if (mmapdebug & MDB_SYNC)
349 		printf("msync: cleaning/flushing address range [%x-%x)\n",
350 		    addr, addr + size);
351 #endif
352 	/*
353 	 * Could pass this in as a third flag argument to implement Sun's
354 	 * MS_ASYNC.
355 	 */
356 	syncio = TRUE;
357 	/*
358 	 * XXX bummer, gotta flush all cached pages to ensure consistency with
359 	 * the file system cache.  Otherwise, we could pass this in to
360 	 * implement Sun's MS_INVALIDATE.
361 	 */
362 	invalidate = TRUE;
363 	/*
364 	 * Clean the pages and interpret the return value.
365 	 */
366 	rv = vm_map_clean(map, addr, addr + size, syncio, invalidate);
367 	switch (rv) {
368 	case KERN_SUCCESS:
369 		break;
370 	case KERN_INVALID_ADDRESS:
371 		return (EINVAL);	/* Sun returns ENOMEM? */
372 	case KERN_FAILURE:
373 		return (EIO);
374 	default:
375 		return (EINVAL);
376 	}
377 	return (0);
378 }
379 
380 struct munmap_args {
381 	caddr_t addr;
382 	int len;
383 };
384 int
385 munmap(p, uap, retval)
386 	register struct proc *p;
387 	register struct munmap_args *uap;
388 	int *retval;
389 {
390 	vm_offset_t addr;
391 	vm_size_t size;
392 	vm_map_t map;
393 
394 #ifdef DEBUG
395 	if (mmapdebug & MDB_FOLLOW)
396 		printf("munmap(%d): addr %x len %x\n",
397 		    p->p_pid, uap->addr, uap->len);
398 #endif
399 
400 	addr = (vm_offset_t) uap->addr;
401 	if ((addr & PAGE_MASK) || uap->len < 0)
402 		return (EINVAL);
403 	size = (vm_size_t) round_page(uap->len);
404 	if (size == 0)
405 		return (0);
406 	/*
407 	 * Check for illegal addresses.  Watch out for address wrap... Note
408 	 * that VM_*_ADDRESS are not constants due to casts (argh).
409 	 */
410 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
411 		return (EINVAL);
412 #ifndef i386
413 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
414 		return (EINVAL);
415 #endif
416 	if (addr + size < addr)
417 		return (EINVAL);
418 	map = &p->p_vmspace->vm_map;
419 	/*
420 	 * Make sure entire range is allocated.
421 	 */
422 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
423 		return (EINVAL);
424 	/* returns nothing but KERN_SUCCESS anyway */
425 	(void) vm_map_remove(map, addr, addr + size);
426 	return (0);
427 }
428 
429 void
430 munmapfd(p, fd)
431 	struct proc *p;
432 	int fd;
433 {
434 #ifdef DEBUG
435 	if (mmapdebug & MDB_FOLLOW)
436 		printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
437 #endif
438 
439 	/*
440 	 * XXX should vm_deallocate any regions mapped to this file
441 	 */
442 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
443 }
444 
445 struct mprotect_args {
446 	caddr_t addr;
447 	int len;
448 	int prot;
449 };
450 int
451 mprotect(p, uap, retval)
452 	struct proc *p;
453 	struct mprotect_args *uap;
454 	int *retval;
455 {
456 	vm_offset_t addr;
457 	vm_size_t size;
458 	register vm_prot_t prot;
459 
460 #ifdef DEBUG
461 	if (mmapdebug & MDB_FOLLOW)
462 		printf("mprotect(%d): addr %x len %x prot %d\n",
463 		    p->p_pid, uap->addr, uap->len, uap->prot);
464 #endif
465 
466 	addr = (vm_offset_t) uap->addr;
467 	if ((addr & PAGE_MASK) || uap->len < 0)
468 		return (EINVAL);
469 	size = (vm_size_t) uap->len;
470 	prot = uap->prot & VM_PROT_ALL;
471 
472 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
473 		FALSE)) {
474 	case KERN_SUCCESS:
475 		return (0);
476 	case KERN_PROTECTION_FAILURE:
477 		return (EACCES);
478 	}
479 	return (EINVAL);
480 }
481 
482 struct madvise_args {
483 	caddr_t addr;
484 	int len;
485 	int behav;
486 };
487 
488 /* ARGSUSED */
489 int
490 madvise(p, uap, retval)
491 	struct proc *p;
492 	struct madvise_args *uap;
493 	int *retval;
494 {
495 
496 	/* Not yet implemented */
497 	return (EOPNOTSUPP);
498 }
499 
500 struct mincore_args {
501 	caddr_t addr;
502 	int len;
503 	char *vec;
504 };
505 
506 /* ARGSUSED */
507 int
508 mincore(p, uap, retval)
509 	struct proc *p;
510 	struct mincore_args *uap;
511 	int *retval;
512 {
513 
514 	/* Not yet implemented */
515 	return (EOPNOTSUPP);
516 }
517 
518 struct mlock_args {
519 	caddr_t addr;
520 	size_t len;
521 };
522 int
523 mlock(p, uap, retval)
524 	struct proc *p;
525 	struct mlock_args *uap;
526 	int *retval;
527 {
528 	vm_offset_t addr;
529 	vm_size_t size;
530 	int error;
531 	extern int vm_page_max_wired;
532 
533 #ifdef DEBUG
534 	if (mmapdebug & MDB_FOLLOW)
535 		printf("mlock(%d): addr %x len %x\n",
536 		    p->p_pid, uap->addr, uap->len);
537 #endif
538 	addr = (vm_offset_t) uap->addr;
539 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
540 		return (EINVAL);
541 	size = round_page((vm_size_t) uap->len);
542 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
543 		return (EAGAIN);
544 #ifdef pmap_wired_count
545 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
546 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
547 		return (EAGAIN);
548 #else
549 	error = suser(p->p_ucred, &p->p_acflag);
550 	if (error)
551 		return (error);
552 #endif
553 
554 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
555 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
556 }
557 
558 struct munlock_args {
559 	caddr_t addr;
560 	size_t len;
561 };
562 int
563 munlock(p, uap, retval)
564 	struct proc *p;
565 	struct munlock_args *uap;
566 	int *retval;
567 {
568 	vm_offset_t addr;
569 	vm_size_t size;
570 	int error;
571 
572 #ifdef DEBUG
573 	if (mmapdebug & MDB_FOLLOW)
574 		printf("munlock(%d): addr %x len %x\n",
575 		    p->p_pid, uap->addr, uap->len);
576 #endif
577 	addr = (vm_offset_t) uap->addr;
578 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
579 		return (EINVAL);
580 #ifndef pmap_wired_count
581 	error = suser(p->p_ucred, &p->p_acflag);
582 	if (error)
583 		return (error);
584 #endif
585 	size = round_page((vm_size_t) uap->len);
586 
587 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
588 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
589 }
590 
591 /*
592  * Internal version of mmap.
593  * Currently used by mmap, exec, and sys5 shared memory.
594  * Handle is either a vnode pointer or NULL for MAP_ANON.
595  */
596 int
597 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
598 	register vm_map_t map;
599 	register vm_offset_t *addr;
600 	register vm_size_t size;
601 	vm_prot_t prot, maxprot;
602 	register int flags;
603 	caddr_t handle;		/* XXX should be vp */
604 	vm_offset_t foff;
605 {
606 	register vm_pager_t pager;
607 	boolean_t fitit;
608 	vm_object_t object;
609 	struct vnode *vp = NULL;
610 	int type;
611 	int rv = KERN_SUCCESS;
612 
613 	if (size == 0)
614 		return (0);
615 
616 	if ((flags & MAP_FIXED) == 0) {
617 		fitit = TRUE;
618 		*addr = round_page(*addr);
619 	} else {
620 		fitit = FALSE;
621 		(void) vm_deallocate(map, *addr, size);
622 	}
623 
624 	/*
625 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup gain
626 	 * a reference to ensure continued existance of the object. (XXX the
627 	 * exception is to appease the pageout daemon)
628 	 */
629 	if (flags & MAP_ANON)
630 		type = PG_DFLT;
631 	else {
632 		vp = (struct vnode *) handle;
633 		if (vp->v_type == VCHR) {
634 			type = PG_DEVICE;
635 			handle = (caddr_t) vp->v_rdev;
636 		} else
637 			type = PG_VNODE;
638 	}
639 	pager = vm_pager_allocate(type, handle, size, prot, foff);
640 	if (pager == NULL)
641 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
642 	/*
643 	 * Find object and release extra reference gained by lookup
644 	 */
645 	object = vm_object_lookup(pager);
646 	if (handle && object == NULL) {
647 		panic("vm_mmap: vm_object_lookup failed");
648 	}
649 	vm_object_deallocate(object);
650 
651 	/*
652 	 * Anonymous memory.
653 	 */
654 	if (flags & MAP_ANON) {
655 		rv = vm_allocate_with_pager(map, addr, size, fitit,
656 		    pager, foff, TRUE);
657 		if (rv != KERN_SUCCESS) {
658 			if (handle == NULL)
659 				vm_pager_deallocate(pager);
660 			else
661 				vm_object_deallocate(object);
662 			goto out;
663 		}
664 		/*
665 		 * Don't cache anonymous objects. Loses the reference gained
666 		 * by vm_pager_allocate. Note that object will be NULL when
667 		 * handle == NULL, this is ok since vm_allocate_with_pager has
668 		 * made sure that these objects are uncached.
669 		 */
670 		(void) pager_cache(object, FALSE);
671 #ifdef DEBUG
672 		if (mmapdebug & MDB_MAPIT)
673 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
674 			    curproc->p_pid, *addr, size, pager);
675 #endif
676 	}
677 	/*
678 	 * Must be a mapped file. Distinguish between character special and
679 	 * regular files.
680 	 */
681 	else if (vp->v_type == VCHR) {
682 		rv = vm_allocate_with_pager(map, addr, size, fitit,
683 		    pager, foff, FALSE);
684 		/*
685 		 * Uncache the object and lose the reference gained by
686 		 * vm_pager_allocate().  If the call to
687 		 * vm_allocate_with_pager() was sucessful, then we gained an
688 		 * additional reference ensuring the object will continue to
689 		 * exist.  If the call failed then the deallocate call below
690 		 * will terminate the object which is fine.
691 		 */
692 		(void) pager_cache(object, FALSE);
693 		if (rv != KERN_SUCCESS)
694 			goto out;
695 	}
696 	/*
697 	 * A regular file
698 	 */
699 	else {
700 #ifdef DEBUG
701 		if (object == NULL)
702 			printf("vm_mmap: no object: vp %x, pager %x\n",
703 			    vp, pager);
704 #endif
705 		/*
706 		 * Map it directly. Allows modifications to go out to the
707 		 * vnode.
708 		 */
709 		if (flags & MAP_SHARED) {
710 			rv = vm_allocate_with_pager(map, addr, size,
711 			    fitit, pager,
712 			    foff, FALSE);
713 			if (rv != KERN_SUCCESS) {
714 				vm_object_deallocate(object);
715 				goto out;
716 			}
717 			/*
718 			 * Don't cache the object.  This is the easiest way of
719 			 * ensuring that data gets back to the filesystem
720 			 * because vnode_pager_deallocate() will fsync the
721 			 * vnode.  pager_cache() will lose the extra ref.
722 			 */
723 			if (prot & VM_PROT_WRITE)
724 				pager_cache(object, FALSE);
725 			else
726 				vm_object_deallocate(object);
727 
728 			if (map->pmap)
729 				pmap_object_init_pt(map->pmap, *addr, object, foff, size);
730 		}
731 		/*
732 		 * Copy-on-write of file.  Two flavors. MAP_COPY is true COW,
733 		 * you essentially get a snapshot of the region at the time of
734 		 * mapping.  MAP_PRIVATE means only that your changes are not
735 		 * reflected back to the object. Changes made by others will
736 		 * be seen.
737 		 */
738 		else {
739 			vm_map_t tmap;
740 			vm_offset_t off;
741 
742 			/* locate and allocate the target address space */
743 			rv = vm_map_find(map, NULL, (vm_offset_t) 0,
744 			    addr, size, fitit);
745 			if (rv != KERN_SUCCESS) {
746 				vm_object_deallocate(object);
747 				goto out;
748 			}
749 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
750 			    VM_MIN_ADDRESS + size, TRUE);
751 			off = VM_MIN_ADDRESS;
752 			rv = vm_allocate_with_pager(tmap, &off, size,
753 			    TRUE, pager,
754 			    foff, FALSE);
755 			if (rv != KERN_SUCCESS) {
756 				vm_object_deallocate(object);
757 				vm_map_deallocate(tmap);
758 				goto out;
759 			}
760 			/*
761 			 * (XXX) MAP_PRIVATE implies that we see changes made
762 			 * by others.  To ensure that we need to guarentee
763 			 * that no copy object is created (otherwise original
764 			 * pages would be pushed to the copy object and we
765 			 * would never see changes made by others).  We
766 			 * totally sleeze it right now by marking the object
767 			 * internal temporarily.
768 			 */
769 			if ((flags & MAP_COPY) == 0)
770 				object->flags |= OBJ_INTERNAL;
771 			rv = vm_map_copy(map, tmap, *addr, size, off,
772 			    FALSE, FALSE);
773 			object->flags &= ~OBJ_INTERNAL;
774 			/*
775 			 * (XXX) My oh my, this only gets worse... Force
776 			 * creation of a shadow object so that vm_map_fork
777 			 * will do the right thing.
778 			 */
779 			if ((flags & MAP_COPY) == 0) {
780 				vm_map_t tmap;
781 				vm_map_entry_t tentry;
782 				vm_object_t tobject;
783 				vm_offset_t toffset;
784 				vm_prot_t tprot;
785 				boolean_t twired, tsu;
786 
787 				tmap = map;
788 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
789 				    &tentry, &tobject, &toffset,
790 				    &tprot, &twired, &tsu);
791 				vm_map_lookup_done(tmap, tentry);
792 			}
793 			/*
794 			 * (XXX) Map copy code cannot detect sharing unless a
795 			 * sharing map is involved.  So we cheat and write
796 			 * protect everything ourselves.
797 			 */
798 			vm_object_pmap_copy(object, foff, foff + size);
799 			if (map->pmap)
800 				pmap_object_init_pt(map->pmap, *addr, object, foff, size);
801 			vm_object_deallocate(object);
802 			vm_map_deallocate(tmap);
803 			if (rv != KERN_SUCCESS)
804 				goto out;
805 		}
806 #ifdef DEBUG
807 		if (mmapdebug & MDB_MAPIT)
808 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
809 			    curproc->p_pid, *addr, size, pager);
810 #endif
811 	}
812 	/*
813 	 * Correct protection (default is VM_PROT_ALL). If maxprot is
814 	 * different than prot, we must set both explicitly.
815 	 */
816 	rv = KERN_SUCCESS;
817 	if (maxprot != VM_PROT_ALL)
818 		rv = vm_map_protect(map, *addr, *addr + size, maxprot, TRUE);
819 	if (rv == KERN_SUCCESS && prot != maxprot)
820 		rv = vm_map_protect(map, *addr, *addr + size, prot, FALSE);
821 	if (rv != KERN_SUCCESS) {
822 		(void) vm_deallocate(map, *addr, size);
823 		goto out;
824 	}
825 	/*
826 	 * Shared memory is also shared with children.
827 	 */
828 	if (flags & MAP_SHARED) {
829 		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
830 		if (rv != KERN_SUCCESS) {
831 			(void) vm_deallocate(map, *addr, size);
832 			goto out;
833 		}
834 	}
835 out:
836 #ifdef DEBUG
837 	if (mmapdebug & MDB_MAPIT)
838 		printf("vm_mmap: rv %d\n", rv);
839 #endif
840 	switch (rv) {
841 	case KERN_SUCCESS:
842 		return (0);
843 	case KERN_INVALID_ADDRESS:
844 	case KERN_NO_SPACE:
845 		return (ENOMEM);
846 	case KERN_PROTECTION_FAILURE:
847 		return (EACCES);
848 	default:
849 		return (EINVAL);
850 	}
851 }
852