xref: /freebsd/sys/vm/vm_mmap.c (revision afe61c15161c324a7af299a9b8457aba5afc92db)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39  *
40  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
41  */
42 
43 /*
44  * Mapped file (mmap) interface to VM
45  */
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/filedesc.h>
50 #include <sys/resourcevar.h>
51 #include <sys/proc.h>
52 #include <sys/vnode.h>
53 #include <sys/file.h>
54 #include <sys/mman.h>
55 #include <sys/conf.h>
56 
57 #include <miscfs/specfs/specdev.h>
58 
59 #include <vm/vm.h>
60 #include <vm/vm_pager.h>
61 #include <vm/vm_prot.h>
62 
63 #ifdef DEBUG
64 int mmapdebug = 0;
65 #define MDB_FOLLOW	0x01
66 #define MDB_SYNC	0x02
67 #define MDB_MAPIT	0x04
68 #endif
69 
70 struct sbrk_args {
71 	int	incr;
72 };
73 /* ARGSUSED */
74 int
75 sbrk(p, uap, retval)
76 	struct proc *p;
77 	struct sbrk_args *uap;
78 	int *retval;
79 {
80 
81 	/* Not yet implemented */
82 	return (EOPNOTSUPP);
83 }
84 
85 struct sstk_args {
86 	int	incr;
87 };
88 /* ARGSUSED */
89 int
90 sstk(p, uap, retval)
91 	struct proc *p;
92 	struct sstk_args *uap;
93 	int *retval;
94 {
95 
96 	/* Not yet implemented */
97 	return (EOPNOTSUPP);
98 }
99 
100 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
101 struct getpagesize_args {
102 	int	dummy;
103 };
104 /* ARGSUSED */
105 int
106 ogetpagesize(p, uap, retval)
107 	struct proc *p;
108 	struct getpagesize_args *uap;
109 	int *retval;
110 {
111 
112 	*retval = PAGE_SIZE;
113 	return (0);
114 }
115 #endif /* COMPAT_43 || COMPAT_SUNOS */
116 
117 struct mmap_args {
118 	caddr_t	addr;
119 	size_t	len;
120 	int	prot;
121 	int	flags;
122 	int	fd;
123 	long	pad;
124 	off_t	pos;
125 };
126 
127 #ifdef COMPAT_43
128 struct ommap_args {
129 	caddr_t	addr;
130 	int	len;
131 	int	prot;
132 	int	flags;
133 	int	fd;
134 	long	pos;
135 };
136 int
137 ommap(p, uap, retval)
138 	struct proc *p;
139 	register struct ommap_args *uap;
140 	int *retval;
141 {
142 	struct mmap_args nargs;
143 	static const char cvtbsdprot[8] = {
144 		0,
145 		PROT_EXEC,
146 		PROT_WRITE,
147 		PROT_EXEC|PROT_WRITE,
148 		PROT_READ,
149 		PROT_EXEC|PROT_READ,
150 		PROT_WRITE|PROT_READ,
151 		PROT_EXEC|PROT_WRITE|PROT_READ,
152 	};
153 #define	OMAP_ANON	0x0002
154 #define	OMAP_COPY	0x0020
155 #define	OMAP_SHARED	0x0010
156 #define	OMAP_FIXED	0x0100
157 #define	OMAP_INHERIT	0x0800
158 
159 	nargs.addr = uap->addr;
160 	nargs.len = uap->len;
161 	nargs.prot = cvtbsdprot[uap->prot&0x7];
162 	nargs.flags = 0;
163 	if (uap->flags & OMAP_ANON)
164 		nargs.flags |= MAP_ANON;
165 	if (uap->flags & OMAP_COPY)
166 		nargs.flags |= MAP_COPY;
167 	if (uap->flags & OMAP_SHARED)
168 		nargs.flags |= MAP_SHARED;
169 	else
170 		nargs.flags |= MAP_PRIVATE;
171 	if (uap->flags & OMAP_FIXED)
172 		nargs.flags |= MAP_FIXED;
173 	if (uap->flags & OMAP_INHERIT)
174 		nargs.flags |= MAP_INHERIT;
175 	nargs.fd = uap->fd;
176 	nargs.pos = uap->pos;
177 	return (mmap(p, &nargs, retval));
178 }
179 #endif
180 
181 int
182 mmap(p, uap, retval)
183 	struct proc *p;
184 	register struct mmap_args *uap;
185 	int *retval;
186 {
187 	register struct filedesc *fdp = p->p_fd;
188 	register struct file *fp;
189 	struct vnode *vp;
190 	vm_offset_t addr;
191 	vm_size_t size;
192 	vm_prot_t prot, maxprot;
193 	caddr_t handle;
194 	int flags, error;
195 
196 	prot = uap->prot & VM_PROT_ALL;
197 	flags = uap->flags;
198 #ifdef DEBUG
199 	if (mmapdebug & MDB_FOLLOW)
200 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
201 		       p->p_pid, uap->addr, uap->len, prot,
202 		       flags, uap->fd, (vm_offset_t)uap->pos);
203 #endif
204 	/*
205 	 * Address (if FIXED) must be page aligned.
206 	 * Size is implicitly rounded to a page boundary.
207 	 */
208 	addr = (vm_offset_t) uap->addr;
209 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
210 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
211 		return (EINVAL);
212 	size = (vm_size_t) round_page(uap->len);
213 	/*
214 	 * Check for illegal addresses.  Watch out for address wrap...
215 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
216 	 */
217 	if (flags & MAP_FIXED) {
218 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
219 			return (EINVAL);
220 #ifndef i386
221 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
222 			return (EINVAL);
223 #endif
224 		if (addr > addr + size)
225 			return (EINVAL);
226 	}
227 	/*
228 	 * XXX if no hint provided for a non-fixed mapping place it after
229 	 * the end of the largest possible heap.
230 	 *
231 	 * There should really be a pmap call to determine a reasonable
232 	 * location.
233 	 */
234 	if (addr == 0 && (flags & MAP_FIXED) == 0)
235 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
236 	if (flags & MAP_ANON) {
237 		/*
238 		 * Mapping blank space is trivial.
239 		 */
240 		handle = NULL;
241 		maxprot = VM_PROT_ALL;
242 	} else {
243 		/*
244 		 * Mapping file, get fp for validation.
245 		 * Obtain vnode and make sure it is of appropriate type.
246 		 */
247 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
248 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
249 			return (EBADF);
250 		if (fp->f_type != DTYPE_VNODE)
251 			return (EINVAL);
252 		vp = (struct vnode *)fp->f_data;
253 		if (vp->v_type != VREG && vp->v_type != VCHR)
254 			return (EINVAL);
255 		/*
256 		 * XXX hack to handle use of /dev/zero to map anon
257 		 * memory (ala SunOS).
258 		 */
259 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
260 			handle = NULL;
261 			maxprot = VM_PROT_ALL;
262 			flags |= MAP_ANON;
263 		} else {
264 			/*
265 			 * Ensure that file and memory protections are
266 			 * compatible.  Note that we only worry about
267 			 * writability if mapping is shared; in this case,
268 			 * current and max prot are dictated by the open file.
269 			 * XXX use the vnode instead?  Problem is: what
270 			 * credentials do we use for determination?
271 			 * What if proc does a setuid?
272 			 */
273 			maxprot = VM_PROT_EXECUTE;	/* ??? */
274 			if (fp->f_flag & FREAD)
275 				maxprot |= VM_PROT_READ;
276 			else if (prot & PROT_READ)
277 				return (EACCES);
278 			if (flags & MAP_SHARED) {
279 				if (fp->f_flag & FWRITE)
280 					maxprot |= VM_PROT_WRITE;
281 				else if (prot & PROT_WRITE)
282 					return (EACCES);
283 			} else
284 				maxprot |= VM_PROT_WRITE;
285 			handle = (caddr_t)vp;
286 		}
287 	}
288 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
289 	    flags, handle, (vm_offset_t)uap->pos);
290 	if (error == 0)
291 		*retval = (int)addr;
292 	return (error);
293 }
294 
295 struct msync_args {
296 	caddr_t	addr;
297 	int	len;
298 };
299 int
300 msync(p, uap, retval)
301 	struct proc *p;
302 	struct msync_args *uap;
303 	int *retval;
304 {
305 	vm_offset_t addr;
306 	vm_size_t size;
307 	vm_map_t map;
308 	int rv;
309 	boolean_t syncio, invalidate;
310 
311 #ifdef DEBUG
312 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
313 		printf("msync(%d): addr %x len %x\n",
314 		       p->p_pid, uap->addr, uap->len);
315 #endif
316 	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
317 		return (EINVAL);
318 	map = &p->p_vmspace->vm_map;
319 	addr = (vm_offset_t)uap->addr;
320 	size = (vm_size_t)uap->len;
321 	/*
322 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
323 	 * pages with the region containing addr".  Unfortunately, we
324 	 * don't really keep track of individual mmaps so we approximate
325 	 * by flushing the range of the map entry containing addr.
326 	 * This can be incorrect if the region splits or is coalesced
327 	 * with a neighbor.
328 	 */
329 	if (size == 0) {
330 		vm_map_entry_t entry;
331 
332 		vm_map_lock_read(map);
333 		rv = vm_map_lookup_entry(map, addr, &entry);
334 		vm_map_unlock_read(map);
335 		if (rv)
336 			return (EINVAL);
337 		addr = entry->start;
338 		size = entry->end - entry->start;
339 	}
340 #ifdef DEBUG
341 	if (mmapdebug & MDB_SYNC)
342 		printf("msync: cleaning/flushing address range [%x-%x)\n",
343 		       addr, addr+size);
344 #endif
345 	/*
346 	 * Could pass this in as a third flag argument to implement
347 	 * Sun's MS_ASYNC.
348 	 */
349 	syncio = TRUE;
350 	/*
351 	 * XXX bummer, gotta flush all cached pages to ensure
352 	 * consistency with the file system cache.  Otherwise, we could
353 	 * pass this in to implement Sun's MS_INVALIDATE.
354 	 */
355 	invalidate = TRUE;
356 	/*
357 	 * Clean the pages and interpret the return value.
358 	 */
359 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
360 	switch (rv) {
361 	case KERN_SUCCESS:
362 		break;
363 	case KERN_INVALID_ADDRESS:
364 		return (EINVAL);	/* Sun returns ENOMEM? */
365 	case KERN_FAILURE:
366 		return (EIO);
367 	default:
368 		return (EINVAL);
369 	}
370 	return (0);
371 }
372 
373 struct munmap_args {
374 	caddr_t	addr;
375 	int	len;
376 };
377 int
378 munmap(p, uap, retval)
379 	register struct proc *p;
380 	register struct munmap_args *uap;
381 	int *retval;
382 {
383 	vm_offset_t addr;
384 	vm_size_t size;
385 	vm_map_t map;
386 
387 #ifdef DEBUG
388 	if (mmapdebug & MDB_FOLLOW)
389 		printf("munmap(%d): addr %x len %x\n",
390 		       p->p_pid, uap->addr, uap->len);
391 #endif
392 
393 	addr = (vm_offset_t) uap->addr;
394 	if ((addr & PAGE_MASK) || uap->len < 0)
395 		return(EINVAL);
396 	size = (vm_size_t) round_page(uap->len);
397 	if (size == 0)
398 		return(0);
399 	/*
400 	 * Check for illegal addresses.  Watch out for address wrap...
401 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
402 	 */
403 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
404 		return (EINVAL);
405 #ifndef i386
406 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
407 		return (EINVAL);
408 #endif
409 	if (addr > addr + size)
410 		return (EINVAL);
411 	map = &p->p_vmspace->vm_map;
412 	/*
413 	 * Make sure entire range is allocated.
414 	 */
415 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
416 		return(EINVAL);
417 	/* returns nothing but KERN_SUCCESS anyway */
418 	(void) vm_map_remove(map, addr, addr+size);
419 	return(0);
420 }
421 
422 void
423 munmapfd(fd)
424 	int fd;
425 {
426 #ifdef DEBUG
427 	if (mmapdebug & MDB_FOLLOW)
428 		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
429 #endif
430 
431 	/*
432 	 * XXX should vm_deallocate any regions mapped to this file
433 	 */
434 	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
435 }
436 
437 struct mprotect_args {
438 	caddr_t	addr;
439 	int	len;
440 	int	prot;
441 };
442 int
443 mprotect(p, uap, retval)
444 	struct proc *p;
445 	struct mprotect_args *uap;
446 	int *retval;
447 {
448 	vm_offset_t addr;
449 	vm_size_t size;
450 	register vm_prot_t prot;
451 
452 #ifdef DEBUG
453 	if (mmapdebug & MDB_FOLLOW)
454 		printf("mprotect(%d): addr %x len %x prot %d\n",
455 		       p->p_pid, uap->addr, uap->len, uap->prot);
456 #endif
457 
458 	addr = (vm_offset_t)uap->addr;
459 	if ((addr & PAGE_MASK) || uap->len < 0)
460 		return(EINVAL);
461 	size = (vm_size_t)uap->len;
462 	prot = uap->prot & VM_PROT_ALL;
463 
464 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
465 	    FALSE)) {
466 	case KERN_SUCCESS:
467 		return (0);
468 	case KERN_PROTECTION_FAILURE:
469 		return (EACCES);
470 	}
471 	return (EINVAL);
472 }
473 
474 struct madvise_args {
475 	caddr_t	addr;
476 	int	len;
477 	int	behav;
478 };
479 /* ARGSUSED */
480 int
481 madvise(p, uap, retval)
482 	struct proc *p;
483 	struct madvise_args *uap;
484 	int *retval;
485 {
486 
487 	/* Not yet implemented */
488 	return (EOPNOTSUPP);
489 }
490 
491 struct mincore_args {
492 	caddr_t	addr;
493 	int	len;
494 	char	*vec;
495 };
496 /* ARGSUSED */
497 int
498 mincore(p, uap, retval)
499 	struct proc *p;
500 	struct mincore_args *uap;
501 	int *retval;
502 {
503 
504 	/* Not yet implemented */
505 	return (EOPNOTSUPP);
506 }
507 
508 struct mlock_args {
509 	caddr_t	addr;
510 	size_t	len;
511 };
512 int
513 mlock(p, uap, retval)
514 	struct proc *p;
515 	struct mlock_args *uap;
516 	int *retval;
517 {
518 	vm_offset_t addr;
519 	vm_size_t size;
520 	int error;
521 	extern int vm_page_max_wired;
522 
523 #ifdef DEBUG
524 	if (mmapdebug & MDB_FOLLOW)
525 		printf("mlock(%d): addr %x len %x\n",
526 		       p->p_pid, uap->addr, uap->len);
527 #endif
528 	addr = (vm_offset_t)uap->addr;
529 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
530 		return (EINVAL);
531 	size = round_page((vm_size_t)uap->len);
532 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
533 		return (EAGAIN);
534 #ifdef pmap_wired_count
535 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
536 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
537 		return (EAGAIN);
538 #else
539 	if (error = suser(p->p_ucred, &p->p_acflag))
540 		return (error);
541 #endif
542 
543 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
544 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
545 }
546 
547 struct munlock_args {
548 	caddr_t	addr;
549 	size_t	len;
550 };
551 int
552 munlock(p, uap, retval)
553 	struct proc *p;
554 	struct munlock_args *uap;
555 	int *retval;
556 {
557 	vm_offset_t addr;
558 	vm_size_t size;
559 	int error;
560 
561 #ifdef DEBUG
562 	if (mmapdebug & MDB_FOLLOW)
563 		printf("munlock(%d): addr %x len %x\n",
564 		       p->p_pid, uap->addr, uap->len);
565 #endif
566 	addr = (vm_offset_t)uap->addr;
567 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
568 		return (EINVAL);
569 #ifndef pmap_wired_count
570 	if (error = suser(p->p_ucred, &p->p_acflag))
571 		return (error);
572 #endif
573 	size = round_page((vm_size_t)uap->len);
574 
575 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
576 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
577 }
578 
579 /*
580  * Internal version of mmap.
581  * Currently used by mmap, exec, and sys5 shared memory.
582  * Handle is either a vnode pointer or NULL for MAP_ANON.
583  */
584 int
585 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
586 	register vm_map_t map;
587 	register vm_offset_t *addr;
588 	register vm_size_t size;
589 	vm_prot_t prot, maxprot;
590 	register int flags;
591 	caddr_t handle;		/* XXX should be vp */
592 	vm_offset_t foff;
593 {
594 	register vm_pager_t pager;
595 	boolean_t fitit;
596 	vm_object_t object;
597 	struct vnode *vp = NULL;
598 	int type;
599 	int rv = KERN_SUCCESS;
600 
601 	if (size == 0)
602 		return (0);
603 
604 	if ((flags & MAP_FIXED) == 0) {
605 		fitit = TRUE;
606 		*addr = round_page(*addr);
607 	} else {
608 		fitit = FALSE;
609 		(void)vm_deallocate(map, *addr, size);
610 	}
611 
612 	/*
613 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
614 	 * gain a reference to ensure continued existance of the object.
615 	 * (XXX the exception is to appease the pageout daemon)
616 	 */
617 	if (flags & MAP_ANON)
618 		type = PG_DFLT;
619 	else {
620 		vp = (struct vnode *)handle;
621 		if (vp->v_type == VCHR) {
622 			type = PG_DEVICE;
623 			handle = (caddr_t)vp->v_rdev;
624 		} else
625 			type = PG_VNODE;
626 	}
627 	pager = vm_pager_allocate(type, handle, size, prot, foff);
628 	if (pager == NULL)
629 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
630 	/*
631 	 * Find object and release extra reference gained by lookup
632 	 */
633 	object = vm_object_lookup(pager);
634 	vm_object_deallocate(object);
635 
636 	/*
637 	 * Anonymous memory.
638 	 */
639 	if (flags & MAP_ANON) {
640 		rv = vm_allocate_with_pager(map, addr, size, fitit,
641 					    pager, foff, TRUE);
642 		if (rv != KERN_SUCCESS) {
643 			if (handle == NULL)
644 				vm_pager_deallocate(pager);
645 			else
646 				vm_object_deallocate(object);
647 			goto out;
648 		}
649 		/*
650 		 * Don't cache anonymous objects.
651 		 * Loses the reference gained by vm_pager_allocate.
652 		 * Note that object will be NULL when handle == NULL,
653 		 * this is ok since vm_allocate_with_pager has made
654 		 * sure that these objects are uncached.
655 		 */
656 		(void) pager_cache(object, FALSE);
657 #ifdef DEBUG
658 		if (mmapdebug & MDB_MAPIT)
659 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
660 			       curproc->p_pid, *addr, size, pager);
661 #endif
662 	}
663 	/*
664 	 * Must be a mapped file.
665 	 * Distinguish between character special and regular files.
666 	 */
667 	else if (vp->v_type == VCHR) {
668 		rv = vm_allocate_with_pager(map, addr, size, fitit,
669 					    pager, foff, FALSE);
670 		/*
671 		 * Uncache the object and lose the reference gained
672 		 * by vm_pager_allocate().  If the call to
673 		 * vm_allocate_with_pager() was sucessful, then we
674 		 * gained an additional reference ensuring the object
675 		 * will continue to exist.  If the call failed then
676 		 * the deallocate call below will terminate the
677 		 * object which is fine.
678 		 */
679 		(void) pager_cache(object, FALSE);
680 		if (rv != KERN_SUCCESS)
681 			goto out;
682 	}
683 	/*
684 	 * A regular file
685 	 */
686 	else {
687 #ifdef DEBUG
688 		if (object == NULL)
689 			printf("vm_mmap: no object: vp %x, pager %x\n",
690 			       vp, pager);
691 #endif
692 		/*
693 		 * Map it directly.
694 		 * Allows modifications to go out to the vnode.
695 		 */
696 		if (flags & MAP_SHARED) {
697 			rv = vm_allocate_with_pager(map, addr, size,
698 						    fitit, pager,
699 						    foff, FALSE);
700 			if (rv != KERN_SUCCESS) {
701 				vm_object_deallocate(object);
702 				goto out;
703 			}
704 			/*
705 			 * Don't cache the object.  This is the easiest way
706 			 * of ensuring that data gets back to the filesystem
707 			 * because vnode_pager_deallocate() will fsync the
708 			 * vnode.  pager_cache() will lose the extra ref.
709 			 */
710 			if (prot & VM_PROT_WRITE)
711 				pager_cache(object, FALSE);
712 			else
713 				vm_object_deallocate(object);
714 		}
715 		/*
716 		 * Copy-on-write of file.  Two flavors.
717 		 * MAP_COPY is true COW, you essentially get a snapshot of
718 		 * the region at the time of mapping.  MAP_PRIVATE means only
719 		 * that your changes are not reflected back to the object.
720 		 * Changes made by others will be seen.
721 		 */
722 		else {
723 			vm_map_t tmap;
724 			vm_offset_t off;
725 
726 			/* locate and allocate the target address space */
727 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
728 					 addr, size, fitit);
729 			if (rv != KERN_SUCCESS) {
730 				vm_object_deallocate(object);
731 				goto out;
732 			}
733 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
734 					     VM_MIN_ADDRESS+size, TRUE);
735 			off = VM_MIN_ADDRESS;
736 			rv = vm_allocate_with_pager(tmap, &off, size,
737 						    TRUE, pager,
738 						    foff, FALSE);
739 			if (rv != KERN_SUCCESS) {
740 				vm_object_deallocate(object);
741 				vm_map_deallocate(tmap);
742 				goto out;
743 			}
744 			/*
745 			 * (XXX)
746 			 * MAP_PRIVATE implies that we see changes made by
747 			 * others.  To ensure that we need to guarentee that
748 			 * no copy object is created (otherwise original
749 			 * pages would be pushed to the copy object and we
750 			 * would never see changes made by others).  We
751 			 * totally sleeze it right now by marking the object
752 			 * internal temporarily.
753 			 */
754 			if ((flags & MAP_COPY) == 0)
755 				object->flags |= OBJ_INTERNAL;
756 			rv = vm_map_copy(map, tmap, *addr, size, off,
757 					 FALSE, FALSE);
758 			object->flags &= ~OBJ_INTERNAL;
759 			/*
760 			 * (XXX)
761 			 * My oh my, this only gets worse...
762 			 * Force creation of a shadow object so that
763 			 * vm_map_fork will do the right thing.
764 			 */
765 			if ((flags & MAP_COPY) == 0) {
766 				vm_map_t tmap;
767 				vm_map_entry_t tentry;
768 				vm_object_t tobject;
769 				vm_offset_t toffset;
770 				vm_prot_t tprot;
771 				boolean_t twired, tsu;
772 
773 				tmap = map;
774 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
775 					      &tentry, &tobject, &toffset,
776 					      &tprot, &twired, &tsu);
777 				vm_map_lookup_done(tmap, tentry);
778 			}
779 			/*
780 			 * (XXX)
781 			 * Map copy code cannot detect sharing unless a
782 			 * sharing map is involved.  So we cheat and write
783 			 * protect everything ourselves.
784 			 */
785 			vm_object_pmap_copy(object, foff, foff + size);
786 			vm_object_deallocate(object);
787 			vm_map_deallocate(tmap);
788 			if (rv != KERN_SUCCESS)
789 				goto out;
790 		}
791 #ifdef DEBUG
792 		if (mmapdebug & MDB_MAPIT)
793 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
794 			       curproc->p_pid, *addr, size, pager);
795 #endif
796 	}
797 	/*
798 	 * Correct protection (default is VM_PROT_ALL).
799 	 * If maxprot is different than prot, we must set both explicitly.
800 	 */
801 	rv = KERN_SUCCESS;
802 	if (maxprot != VM_PROT_ALL)
803 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
804 	if (rv == KERN_SUCCESS && prot != maxprot)
805 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
806 	if (rv != KERN_SUCCESS) {
807 		(void) vm_deallocate(map, *addr, size);
808 		goto out;
809 	}
810 	/*
811 	 * Shared memory is also shared with children.
812 	 */
813 	if (flags & MAP_SHARED) {
814 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
815 		if (rv != KERN_SUCCESS) {
816 			(void) vm_deallocate(map, *addr, size);
817 			goto out;
818 		}
819 	}
820 out:
821 #ifdef DEBUG
822 	if (mmapdebug & MDB_MAPIT)
823 		printf("vm_mmap: rv %d\n", rv);
824 #endif
825 	switch (rv) {
826 	case KERN_SUCCESS:
827 		return (0);
828 	case KERN_INVALID_ADDRESS:
829 	case KERN_NO_SPACE:
830 		return (ENOMEM);
831 	case KERN_PROTECTION_FAILURE:
832 		return (EACCES);
833 	default:
834 		return (EINVAL);
835 	}
836 }
837