xref: /freebsd/sys/vm/vm_mmap.c (revision 8e537d168674d6b65869f73c20813001af875738)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39  *
40  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
41  * $Id: vm_mmap.c,v 1.49 1996/07/30 03:08:12 dyson Exp $
42  */
43 
44 /*
45  * Mapped file (mmap) interface to VM
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/sysproto.h>
51 #include <sys/filedesc.h>
52 #include <sys/resourcevar.h>
53 #include <sys/proc.h>
54 #include <sys/vnode.h>
55 #include <sys/file.h>
56 #include <sys/mman.h>
57 #include <sys/conf.h>
58 #include <sys/vmmeter.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_prot.h>
65 #include <vm/vm_inherit.h>
66 #include <vm/lock.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_pager.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_kern.h>
74 #include <vm/vm_page.h>
75 
76 #ifndef _SYS_SYSPROTO_H_
77 struct sbrk_args {
78 	int incr;
79 };
80 #endif
81 
82 /* ARGSUSED */
83 int
84 sbrk(p, uap, retval)
85 	struct proc *p;
86 	struct sbrk_args *uap;
87 	int *retval;
88 {
89 
90 	/* Not yet implemented */
91 	return (EOPNOTSUPP);
92 }
93 
94 #ifndef _SYS_SYSPROTO_H_
95 struct sstk_args {
96 	int incr;
97 };
98 #endif
99 
100 /* ARGSUSED */
101 int
102 sstk(p, uap, retval)
103 	struct proc *p;
104 	struct sstk_args *uap;
105 	int *retval;
106 {
107 
108 	/* Not yet implemented */
109 	return (EOPNOTSUPP);
110 }
111 
112 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
113 #ifndef _SYS_SYSPROTO_H_
114 struct getpagesize_args {
115 	int dummy;
116 };
117 #endif
118 
119 /* ARGSUSED */
120 int
121 ogetpagesize(p, uap, retval)
122 	struct proc *p;
123 	struct getpagesize_args *uap;
124 	int *retval;
125 {
126 
127 	*retval = PAGE_SIZE;
128 	return (0);
129 }
130 #endif				/* COMPAT_43 || COMPAT_SUNOS */
131 
132 #ifndef _SYS_SYSPROTO_H_
133 struct mmap_args {
134 	caddr_t addr;
135 	size_t len;
136 	int prot;
137 	int flags;
138 	int fd;
139 	long pad;
140 	off_t pos;
141 };
142 #endif
143 
144 int
145 mmap(p, uap, retval)
146 	struct proc *p;
147 	register struct mmap_args *uap;
148 	int *retval;
149 {
150 	register struct filedesc *fdp = p->p_fd;
151 	register struct file *fp;
152 	struct vnode *vp;
153 	vm_offset_t addr;
154 	vm_size_t size, pageoff;
155 	vm_prot_t prot, maxprot;
156 	caddr_t handle;
157 	int flags, error;
158 
159 	prot = uap->prot & VM_PROT_ALL;
160 	flags = uap->flags;
161 	/*
162 	 * Address (if FIXED) must be page aligned. Size is implicitly rounded
163 	 * to a page boundary.
164 	 */
165 	addr = (vm_offset_t) uap->addr;
166 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
167 	    (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
168 		return (EINVAL);
169 
170 	/*
171 	 * Round page if not already disallowed by above test
172 	 * XXX: Is there any point in the MAP_FIXED align requirement above?
173 	 */
174 	size = uap->len;
175 	pageoff = (addr & PAGE_MASK);
176 	addr -= pageoff;
177 	size += pageoff;
178 	size = (vm_size_t) round_page(size);
179 
180 	/*
181 	 * Check for illegal addresses.  Watch out for address wrap... Note
182 	 * that VM_*_ADDRESS are not constants due to casts (argh).
183 	 */
184 	if (flags & MAP_FIXED) {
185 		if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
186 			return (EINVAL);
187 #ifndef i386
188 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
189 			return (EINVAL);
190 #endif
191 		if (addr + size < addr)
192 			return (EINVAL);
193 	}
194 	/*
195 	 * XXX if no hint provided for a non-fixed mapping place it after the
196 	 * end of the largest possible heap.
197 	 *
198 	 * There should really be a pmap call to determine a reasonable location.
199 	 */
200 	if (addr == 0 && (flags & MAP_FIXED) == 0)
201 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
202 	if (flags & MAP_ANON) {
203 		/*
204 		 * Mapping blank space is trivial.
205 		 */
206 		handle = NULL;
207 		maxprot = VM_PROT_ALL;
208 	} else {
209 		/*
210 		 * Mapping file, get fp for validation. Obtain vnode and make
211 		 * sure it is of appropriate type.
212 		 */
213 		if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
214 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
215 			return (EBADF);
216 		if (fp->f_type != DTYPE_VNODE)
217 			return (EINVAL);
218 		vp = (struct vnode *) fp->f_data;
219 		if (vp->v_type != VREG && vp->v_type != VCHR)
220 			return (EINVAL);
221 		/*
222 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
223 		 * SunOS).
224 		 */
225 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
226 			handle = NULL;
227 			maxprot = VM_PROT_ALL;
228 			flags |= MAP_ANON;
229 		} else {
230 			/*
231 			 * Ensure that file and memory protections are
232 			 * compatible.  Note that we only worry about
233 			 * writability if mapping is shared; in this case,
234 			 * current and max prot are dictated by the open file.
235 			 * XXX use the vnode instead?  Problem is: what
236 			 * credentials do we use for determination? What if
237 			 * proc does a setuid?
238 			 */
239 			maxprot = VM_PROT_EXECUTE;	/* ??? */
240 			if (fp->f_flag & FREAD)
241 				maxprot |= VM_PROT_READ;
242 			else if (prot & PROT_READ)
243 				return (EACCES);
244 			if (flags & MAP_SHARED) {
245 				if (fp->f_flag & FWRITE)
246 					maxprot |= VM_PROT_WRITE;
247 				else if (prot & PROT_WRITE)
248 					return (EACCES);
249 			} else
250 				maxprot |= VM_PROT_WRITE;
251 			handle = (caddr_t) vp;
252 		}
253 	}
254 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
255 	    flags, handle, uap->pos);
256 	if (error == 0)
257 		*retval = (int) addr;
258 	return (error);
259 }
260 
261 #ifdef COMPAT_43
262 #ifndef _SYS_SYSPROTO_H_
263 struct ommap_args {
264 	caddr_t addr;
265 	int len;
266 	int prot;
267 	int flags;
268 	int fd;
269 	long pos;
270 };
271 #endif
272 int
273 ommap(p, uap, retval)
274 	struct proc *p;
275 	register struct ommap_args *uap;
276 	int *retval;
277 {
278 	struct mmap_args nargs;
279 	static const char cvtbsdprot[8] = {
280 		0,
281 		PROT_EXEC,
282 		PROT_WRITE,
283 		PROT_EXEC | PROT_WRITE,
284 		PROT_READ,
285 		PROT_EXEC | PROT_READ,
286 		PROT_WRITE | PROT_READ,
287 		PROT_EXEC | PROT_WRITE | PROT_READ,
288 	};
289 
290 #define	OMAP_ANON	0x0002
291 #define	OMAP_COPY	0x0020
292 #define	OMAP_SHARED	0x0010
293 #define	OMAP_FIXED	0x0100
294 #define	OMAP_INHERIT	0x0800
295 
296 	nargs.addr = uap->addr;
297 	nargs.len = uap->len;
298 	nargs.prot = cvtbsdprot[uap->prot & 0x7];
299 	nargs.flags = 0;
300 	if (uap->flags & OMAP_ANON)
301 		nargs.flags |= MAP_ANON;
302 	if (uap->flags & OMAP_COPY)
303 		nargs.flags |= MAP_COPY;
304 	if (uap->flags & OMAP_SHARED)
305 		nargs.flags |= MAP_SHARED;
306 	else
307 		nargs.flags |= MAP_PRIVATE;
308 	if (uap->flags & OMAP_FIXED)
309 		nargs.flags |= MAP_FIXED;
310 	if (uap->flags & OMAP_INHERIT)
311 		nargs.flags |= MAP_INHERIT;
312 	nargs.fd = uap->fd;
313 	nargs.pos = uap->pos;
314 	return (mmap(p, &nargs, retval));
315 }
316 #endif				/* COMPAT_43 */
317 
318 
319 #ifndef _SYS_SYSPROTO_H_
320 struct msync_args {
321 	caddr_t addr;
322 	int len;
323 	int flags;
324 };
325 #endif
326 int
327 msync(p, uap, retval)
328 	struct proc *p;
329 	struct msync_args *uap;
330 	int *retval;
331 {
332 	vm_offset_t addr;
333 	vm_size_t size, pageoff;
334 	int flags;
335 	vm_map_t map;
336 	int rv;
337 
338 	addr = (vm_offset_t) uap->addr;
339 	size = uap->len;
340 	flags = uap->flags;
341 
342 	pageoff = (addr & PAGE_MASK);
343 	addr -= pageoff;
344 	size += pageoff;
345 	size = (vm_size_t) round_page(size);
346 	if (addr + size < addr)
347 		return(EINVAL);
348 
349 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
350 		return (EINVAL);
351 
352 	map = &p->p_vmspace->vm_map;
353 
354 	/*
355 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
356 	 * pages with the region containing addr".  Unfortunately, we don't
357 	 * really keep track of individual mmaps so we approximate by flushing
358 	 * the range of the map entry containing addr. This can be incorrect
359 	 * if the region splits or is coalesced with a neighbor.
360 	 */
361 	if (size == 0) {
362 		vm_map_entry_t entry;
363 
364 		vm_map_lock_read(map);
365 		rv = vm_map_lookup_entry(map, addr, &entry);
366 		vm_map_unlock_read(map);
367 		if (rv == FALSE)
368 			return (EINVAL);
369 		addr = entry->start;
370 		size = entry->end - entry->start;
371 	}
372 
373 	/*
374 	 * Clean the pages and interpret the return value.
375 	 */
376 	rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
377 	    (flags & MS_INVALIDATE) != 0);
378 
379 	switch (rv) {
380 	case KERN_SUCCESS:
381 		break;
382 	case KERN_INVALID_ADDRESS:
383 		return (EINVAL);	/* Sun returns ENOMEM? */
384 	case KERN_FAILURE:
385 		return (EIO);
386 	default:
387 		return (EINVAL);
388 	}
389 
390 	return (0);
391 }
392 
393 #ifndef _SYS_SYSPROTO_H_
394 struct munmap_args {
395 	caddr_t addr;
396 	size_t len;
397 };
398 #endif
399 int
400 munmap(p, uap, retval)
401 	register struct proc *p;
402 	register struct munmap_args *uap;
403 	int *retval;
404 {
405 	vm_offset_t addr;
406 	vm_size_t size, pageoff;
407 	vm_map_t map;
408 
409 	addr = (vm_offset_t) uap->addr;
410 	size = uap->len;
411 
412 	pageoff = (addr & PAGE_MASK);
413 	addr -= pageoff;
414 	size += pageoff;
415 	size = (vm_size_t) round_page(size);
416 	if (addr + size < addr)
417 		return(EINVAL);
418 
419 	if (size == 0)
420 		return (0);
421 
422 	/*
423 	 * Check for illegal addresses.  Watch out for address wrap... Note
424 	 * that VM_*_ADDRESS are not constants due to casts (argh).
425 	 */
426 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
427 		return (EINVAL);
428 #ifndef i386
429 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
430 		return (EINVAL);
431 #endif
432 	if (addr + size < addr)
433 		return (EINVAL);
434 	map = &p->p_vmspace->vm_map;
435 	/*
436 	 * Make sure entire range is allocated.
437 	 */
438 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
439 		return (EINVAL);
440 	/* returns nothing but KERN_SUCCESS anyway */
441 	(void) vm_map_remove(map, addr, addr + size);
442 	return (0);
443 }
444 
445 void
446 munmapfd(p, fd)
447 	struct proc *p;
448 	int fd;
449 {
450 	/*
451 	 * XXX should unmap any regions mapped to this file
452 	 */
453 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
454 }
455 
456 #ifndef _SYS_SYSPROTO_H_
457 struct mprotect_args {
458 	caddr_t addr;
459 	size_t len;
460 	int prot;
461 };
462 #endif
463 int
464 mprotect(p, uap, retval)
465 	struct proc *p;
466 	struct mprotect_args *uap;
467 	int *retval;
468 {
469 	vm_offset_t addr;
470 	vm_size_t size, pageoff;
471 	register vm_prot_t prot;
472 
473 	addr = (vm_offset_t) uap->addr;
474 	size = uap->len;
475 	prot = uap->prot & VM_PROT_ALL;
476 
477 	pageoff = (addr & PAGE_MASK);
478 	addr -= pageoff;
479 	size += pageoff;
480 	size = (vm_size_t) round_page(size);
481 	if (addr + size < addr)
482 		return(EINVAL);
483 
484 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
485 		FALSE)) {
486 	case KERN_SUCCESS:
487 		return (0);
488 	case KERN_PROTECTION_FAILURE:
489 		return (EACCES);
490 	}
491 	return (EINVAL);
492 }
493 
494 #ifndef _SYS_SYSPROTO_H_
495 struct minherit_args {
496 	caddr_t addr;
497 	size_t len;
498 	int inherit;
499 };
500 #endif
501 int
502 minherit(p, uap, retval)
503 	struct proc *p;
504 	struct minherit_args *uap;
505 	int *retval;
506 {
507 	vm_offset_t addr;
508 	vm_size_t size, pageoff;
509 	register vm_inherit_t inherit;
510 
511 	addr = (vm_offset_t)uap->addr;
512 	size = uap->len;
513 	inherit = uap->inherit;
514 
515 	pageoff = (addr & PAGE_MASK);
516 	addr -= pageoff;
517 	size += pageoff;
518 	size = (vm_size_t) round_page(size);
519 	if (addr + size < addr)
520 		return(EINVAL);
521 
522 	switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
523 	    inherit)) {
524 	case KERN_SUCCESS:
525 		return (0);
526 	case KERN_PROTECTION_FAILURE:
527 		return (EACCES);
528 	}
529 	return (EINVAL);
530 }
531 
532 #ifndef _SYS_SYSPROTO_H_
533 struct madvise_args {
534 	caddr_t addr;
535 	size_t len;
536 	int behav;
537 };
538 #endif
539 
540 /* ARGSUSED */
541 int
542 madvise(p, uap, retval)
543 	struct proc *p;
544 	struct madvise_args *uap;
545 	int *retval;
546 {
547 	vm_map_t map;
548 	pmap_t pmap;
549 	vm_offset_t start, end;
550 	/*
551 	 * Check for illegal addresses.  Watch out for address wrap... Note
552 	 * that VM_*_ADDRESS are not constants due to casts (argh).
553 	 */
554 	if (VM_MAXUSER_ADDRESS > 0 &&
555 		((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
556 		return (EINVAL);
557 #ifndef i386
558 	if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
559 		return (EINVAL);
560 #endif
561 	if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
562 		return (EINVAL);
563 
564 	/*
565 	 * Since this routine is only advisory, we default to conservative
566 	 * behavior.
567 	 */
568 	start = trunc_page((vm_offset_t) uap->addr);
569 	end = round_page((vm_offset_t) uap->addr + uap->len);
570 
571 	map = &p->p_vmspace->vm_map;
572 	pmap = &p->p_vmspace->vm_pmap;
573 
574 	vm_map_madvise(map, pmap, start, end, uap->behav);
575 
576 	/* Not yet implemented */
577 	return (0);
578 }
579 
580 #ifndef _SYS_SYSPROTO_H_
581 struct mincore_args {
582 	caddr_t addr;
583 	size_t len;
584 	char *vec;
585 };
586 #endif
587 
588 /* ARGSUSED */
589 int
590 mincore(p, uap, retval)
591 	struct proc *p;
592 	struct mincore_args *uap;
593 	int *retval;
594 {
595 	vm_offset_t addr, first_addr;
596 	vm_offset_t end, cend;
597 	pmap_t pmap;
598 	vm_map_t map;
599 	char *vec;
600 	int error;
601 	int vecindex, lastvecindex;
602 	register vm_map_entry_t current;
603 	vm_map_entry_t entry;
604 	int mincoreinfo;
605 
606 	/*
607 	 * Make sure that the addresses presented are valid for user
608 	 * mode.
609 	 */
610 	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
611 	end = addr + (vm_size_t)round_page(uap->len);
612 	if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
613 		return (EINVAL);
614 	if (end < addr)
615 		return (EINVAL);
616 
617 	/*
618 	 * Address of byte vector
619 	 */
620 	vec = uap->vec;
621 
622 	map = &p->p_vmspace->vm_map;
623 	pmap = &p->p_vmspace->vm_pmap;
624 
625 	vm_map_lock(map);
626 
627 	/*
628 	 * Not needed here
629 	 */
630 #if 0
631 	VM_MAP_RANGE_CHECK(map, addr, end);
632 #endif
633 
634 	if (!vm_map_lookup_entry(map, addr, &entry))
635 		entry = entry->next;
636 
637 	/*
638 	 * Do this on a map entry basis so that if the pages are not
639 	 * in the current processes address space, we can easily look
640 	 * up the pages elsewhere.
641 	 */
642 	lastvecindex = -1;
643 	for(current = entry;
644 		(current != &map->header) && (current->start < end);
645 		current = current->next) {
646 
647 		/*
648 		 * ignore submaps (for now) or null objects
649 		 */
650 		if (current->is_a_map || current->is_sub_map ||
651 			current->object.vm_object == NULL)
652 			continue;
653 
654 		/*
655 		 * limit this scan to the current map entry and the
656 		 * limits for the mincore call
657 		 */
658 		if (addr < current->start)
659 			addr = current->start;
660 		cend = current->end;
661 		if (cend > end)
662 			cend = end;
663 
664 		/*
665 		 * scan this entry one page at a time
666 		 */
667 		while(addr < cend) {
668 			/*
669 			 * Check pmap first, it is likely faster, also
670 			 * it can provide info as to whether we are the
671 			 * one referencing or modifying the page.
672 			 */
673 			mincoreinfo = pmap_mincore(pmap, addr);
674 			if (!mincoreinfo) {
675 				vm_pindex_t pindex;
676 				vm_ooffset_t offset;
677 				vm_page_t m;
678 				/*
679 				 * calculate the page index into the object
680 				 */
681 				offset = current->offset + (addr - current->start);
682 				pindex = OFF_TO_IDX(offset);
683 				m = vm_page_lookup(current->object.vm_object,
684 					pindex);
685 				/*
686 				 * if the page is resident, then gather information about
687 				 * it.
688 				 */
689 				if (m) {
690 					mincoreinfo = MINCORE_INCORE;
691 					if (m->dirty ||
692 						pmap_is_modified(VM_PAGE_TO_PHYS(m)))
693 						mincoreinfo |= MINCORE_MODIFIED_OTHER;
694 					if ((m->flags & PG_REFERENCED) ||
695 						pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
696 						mincoreinfo |= MINCORE_REFERENCED_OTHER;
697 				}
698 			}
699 
700 			/*
701 			 * calculate index into user supplied byte vector
702 			 */
703 			vecindex = OFF_TO_IDX(addr - first_addr);
704 
705 			/*
706 			 * If we have skipped map entries, we need to make sure that
707 			 * the byte vector is zeroed for those skipped entries.
708 			 */
709 			while((lastvecindex + 1) < vecindex) {
710 				error = subyte( vec + lastvecindex, 0);
711 				if (error) {
712 					vm_map_unlock(map);
713 					return (EFAULT);
714 				}
715 				++lastvecindex;
716 			}
717 
718 			/*
719 			 * Pass the page information to the user
720 			 */
721 			error = subyte( vec + vecindex, mincoreinfo);
722 			if (error) {
723 				vm_map_unlock(map);
724 				return (EFAULT);
725 			}
726 			lastvecindex = vecindex;
727 			addr += PAGE_SIZE;
728 		}
729 	}
730 
731 	/*
732 	 * Zero the last entries in the byte vector.
733 	 */
734 	vecindex = OFF_TO_IDX(end - first_addr);
735 	while((lastvecindex + 1) < vecindex) {
736 		error = subyte( vec + lastvecindex, 0);
737 		if (error) {
738 			vm_map_unlock(map);
739 			return (EFAULT);
740 		}
741 		++lastvecindex;
742 	}
743 
744 	vm_map_unlock(map);
745 	return (0);
746 }
747 
748 #ifndef _SYS_SYSPROTO_H_
749 struct mlock_args {
750 	caddr_t addr;
751 	size_t len;
752 };
753 #endif
754 int
755 mlock(p, uap, retval)
756 	struct proc *p;
757 	struct mlock_args *uap;
758 	int *retval;
759 {
760 	vm_offset_t addr;
761 	vm_size_t size, pageoff;
762 	int error;
763 
764 	addr = (vm_offset_t) uap->addr;
765 	size = uap->len;
766 
767 	pageoff = (addr & PAGE_MASK);
768 	addr -= pageoff;
769 	size += pageoff;
770 	size = (vm_size_t) round_page(size);
771 
772 	/* disable wrap around */
773 	if (addr + size < addr)
774 		return (EINVAL);
775 
776 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
777 		return (EAGAIN);
778 
779 #ifdef pmap_wired_count
780 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
781 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
782 		return (EAGAIN);
783 #else
784 	error = suser(p->p_ucred, &p->p_acflag);
785 	if (error)
786 		return (error);
787 #endif
788 
789 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
790 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
791 }
792 
793 #ifndef _SYS_SYSPROTO_H_
794 struct munlock_args {
795 	caddr_t addr;
796 	size_t len;
797 };
798 #endif
799 int
800 munlock(p, uap, retval)
801 	struct proc *p;
802 	struct munlock_args *uap;
803 	int *retval;
804 {
805 	vm_offset_t addr;
806 	vm_size_t size, pageoff;
807 	int error;
808 
809 	addr = (vm_offset_t) uap->addr;
810 	size = uap->len;
811 
812 	pageoff = (addr & PAGE_MASK);
813 	addr -= pageoff;
814 	size += pageoff;
815 	size = (vm_size_t) round_page(size);
816 
817 	/* disable wrap around */
818 	if (addr + size < addr)
819 		return (EINVAL);
820 
821 #ifndef pmap_wired_count
822 	error = suser(p->p_ucred, &p->p_acflag);
823 	if (error)
824 		return (error);
825 #endif
826 
827 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
828 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
829 }
830 
831 /*
832  * Internal version of mmap.
833  * Currently used by mmap, exec, and sys5 shared memory.
834  * Handle is either a vnode pointer or NULL for MAP_ANON.
835  */
836 int
837 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
838 	register vm_map_t map;
839 	register vm_offset_t *addr;
840 	register vm_size_t size;
841 	vm_prot_t prot, maxprot;
842 	register int flags;
843 	caddr_t handle;		/* XXX should be vp */
844 	vm_ooffset_t foff;
845 {
846 	boolean_t fitit;
847 	vm_object_t object, object2;
848 	struct vnode *vp = NULL;
849 	objtype_t type;
850 	int rv = KERN_SUCCESS;
851 	vm_ooffset_t objsize;
852 	int docow;
853 	struct proc *p = curproc;
854 
855 	if (size == 0)
856 		return (0);
857 
858 	objsize = size = round_page(size);
859 
860 	/*
861 	 * We currently can only deal with page aligned file offsets.
862 	 * The check is here rather than in the syscall because the
863 	 * kernel calls this function internally for other mmaping
864 	 * operations (such as in exec) and non-aligned offsets will
865 	 * cause pmap inconsistencies...so we want to be sure to
866 	 * disallow this in all cases.
867 	 */
868 	if (foff & PAGE_MASK)
869 		return (EINVAL);
870 
871 	if ((flags & MAP_FIXED) == 0) {
872 		fitit = TRUE;
873 		*addr = round_page(*addr);
874 	} else {
875 		if (*addr != trunc_page(*addr))
876 			return (EINVAL);
877 		fitit = FALSE;
878 		(void) vm_map_remove(map, *addr, *addr + size);
879 	}
880 
881 	/*
882 	 * Lookup/allocate object.
883 	 */
884 	if (flags & MAP_ANON) {
885 		type = OBJT_SWAP;
886 		/*
887 		 * Unnamed anonymous regions always start at 0.
888 		 */
889 		if (handle == 0)
890 			foff = 0;
891 	} else {
892 		vp = (struct vnode *) handle;
893 		if (vp->v_type == VCHR) {
894 			type = OBJT_DEVICE;
895 			handle = (caddr_t) vp->v_rdev;
896 		} else {
897 			struct vattr vat;
898 			int error;
899 
900 			error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
901 			if (error)
902 				return (error);
903 			objsize = round_page(vat.va_size);
904 			type = OBJT_VNODE;
905 		}
906 	}
907 	object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff);
908 	if (object == NULL)
909 		return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
910 
911 	/*
912 	 * Force device mappings to be shared.
913 	 */
914 	if (type == OBJT_DEVICE) {
915 		flags &= ~(MAP_PRIVATE|MAP_COPY);
916 		flags |= MAP_SHARED;
917 	}
918 
919 	object2 = NULL;
920 	docow = 0;
921 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
922 		docow = MAP_COPY_ON_WRITE;
923 		if (objsize < size) {
924 			object2 = vm_object_allocate( OBJT_DEFAULT,
925 				OFF_TO_IDX(size - (foff & ~PAGE_MASK)));
926 			object2->backing_object = object;
927 			object2->backing_object_offset = foff;
928 			TAILQ_INSERT_TAIL(&object->shadow_head,
929 				object2, shadow_list);
930 			++object->shadow_count;
931 		} else {
932 			docow |= MAP_COPY_NEEDED;
933 		}
934 	}
935 
936 	if (object2)
937 		rv = vm_map_find(map, object2, 0, addr, size, fitit,
938 			prot, maxprot, docow);
939 	else
940 		rv = vm_map_find(map, object, foff, addr, size, fitit,
941 			prot, maxprot, docow);
942 
943 
944 	if (rv != KERN_SUCCESS) {
945 		/*
946 		 * Lose the object reference. Will destroy the
947 		 * object if it's an unnamed anonymous mapping
948 		 * or named anonymous without other references.
949 		 */
950 		if (object2)
951 			vm_object_deallocate(object2);
952 		else
953 			vm_object_deallocate(object);
954 		goto out;
955 	}
956 
957 	/*
958 	 * "Pre-fault" resident pages.
959 	 */
960 	if ((type == OBJT_VNODE) && (map->pmap != NULL)) {
961 		pmap_object_init_pt(map->pmap, *addr,
962 			object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1);
963 	}
964 
965 	/*
966 	 * Shared memory is also shared with children.
967 	 */
968 	if (flags & (MAP_SHARED|MAP_INHERIT)) {
969 		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
970 		if (rv != KERN_SUCCESS) {
971 			(void) vm_map_remove(map, *addr, *addr + size);
972 			goto out;
973 		}
974 	}
975 out:
976 	switch (rv) {
977 	case KERN_SUCCESS:
978 		return (0);
979 	case KERN_INVALID_ADDRESS:
980 	case KERN_NO_SPACE:
981 		return (ENOMEM);
982 	case KERN_PROTECTION_FAILURE:
983 		return (EACCES);
984 	default:
985 		return (EINVAL);
986 	}
987 }
988