xref: /freebsd/sys/vm/vm_mmap.c (revision bcd92649c9952c9c9e8845dbd34276a60dd16664)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
39  *
40  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
41  * $Id: vm_mmap.c,v 1.52 1996/10/24 02:56:23 dyson Exp $
42  */
43 
44 /*
45  * Mapped file (mmap) interface to VM
46  */
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/sysproto.h>
51 #include <sys/filedesc.h>
52 #include <sys/resourcevar.h>
53 #include <sys/proc.h>
54 #include <sys/vnode.h>
55 #include <sys/file.h>
56 #include <sys/mman.h>
57 #include <sys/conf.h>
58 #include <sys/vmmeter.h>
59 
60 #include <miscfs/specfs/specdev.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_prot.h>
65 #include <vm/vm_inherit.h>
66 #include <vm/lock.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_pager.h>
71 #include <vm/vm_pageout.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_kern.h>
74 #include <vm/vm_page.h>
75 
76 #ifndef _SYS_SYSPROTO_H_
77 struct sbrk_args {
78 	int incr;
79 };
80 #endif
81 
82 /* ARGSUSED */
83 int
84 sbrk(p, uap, retval)
85 	struct proc *p;
86 	struct sbrk_args *uap;
87 	int *retval;
88 {
89 
90 	/* Not yet implemented */
91 	return (EOPNOTSUPP);
92 }
93 
94 #ifndef _SYS_SYSPROTO_H_
95 struct sstk_args {
96 	int incr;
97 };
98 #endif
99 
100 /* ARGSUSED */
101 int
102 sstk(p, uap, retval)
103 	struct proc *p;
104 	struct sstk_args *uap;
105 	int *retval;
106 {
107 
108 	/* Not yet implemented */
109 	return (EOPNOTSUPP);
110 }
111 
112 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
113 #ifndef _SYS_SYSPROTO_H_
114 struct getpagesize_args {
115 	int dummy;
116 };
117 #endif
118 
119 /* ARGSUSED */
120 int
121 ogetpagesize(p, uap, retval)
122 	struct proc *p;
123 	struct getpagesize_args *uap;
124 	int *retval;
125 {
126 
127 	*retval = PAGE_SIZE;
128 	return (0);
129 }
130 #endif				/* COMPAT_43 || COMPAT_SUNOS */
131 
132 #ifndef _SYS_SYSPROTO_H_
133 struct mmap_args {
134 	caddr_t addr;
135 	size_t len;
136 	int prot;
137 	int flags;
138 	int fd;
139 	long pad;
140 	off_t pos;
141 };
142 #endif
143 
144 int
145 mmap(p, uap, retval)
146 	struct proc *p;
147 	register struct mmap_args *uap;
148 	int *retval;
149 {
150 	register struct filedesc *fdp = p->p_fd;
151 	register struct file *fp;
152 	struct vnode *vp;
153 	vm_offset_t addr;
154 	vm_size_t size, pageoff;
155 	vm_prot_t prot, maxprot;
156 	caddr_t handle;
157 	int flags, error;
158 
159 	prot = uap->prot & VM_PROT_ALL;
160 	flags = uap->flags;
161 	/*
162 	 * Address (if FIXED) must be page aligned. Size is implicitly rounded
163 	 * to a page boundary.
164 	 */
165 	addr = (vm_offset_t) uap->addr;
166 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
167 	    (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
168 		return (EINVAL);
169 
170 	/*
171 	 * Round page if not already disallowed by above test
172 	 * XXX: Is there any point in the MAP_FIXED align requirement above?
173 	 */
174 	size = uap->len;
175 	pageoff = (addr & PAGE_MASK);
176 	addr -= pageoff;
177 	size += pageoff;
178 	size = (vm_size_t) round_page(size);
179 
180 	/*
181 	 * Check for illegal addresses.  Watch out for address wrap... Note
182 	 * that VM_*_ADDRESS are not constants due to casts (argh).
183 	 */
184 	if (flags & MAP_FIXED) {
185 		if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
186 			return (EINVAL);
187 #ifndef i386
188 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
189 			return (EINVAL);
190 #endif
191 		if (addr + size < addr)
192 			return (EINVAL);
193 	}
194 	/*
195 	 * XXX if no hint provided for a non-fixed mapping place it after the
196 	 * end of the largest possible heap.
197 	 *
198 	 * There should really be a pmap call to determine a reasonable location.
199 	 */
200 	if (addr == 0 && (flags & MAP_FIXED) == 0)
201 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
202 	if (flags & MAP_ANON) {
203 		/*
204 		 * Mapping blank space is trivial.
205 		 */
206 		handle = NULL;
207 		maxprot = VM_PROT_ALL;
208 	} else {
209 		/*
210 		 * Mapping file, get fp for validation. Obtain vnode and make
211 		 * sure it is of appropriate type.
212 		 */
213 		if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
214 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
215 			return (EBADF);
216 		if (fp->f_type != DTYPE_VNODE)
217 			return (EINVAL);
218 		vp = (struct vnode *) fp->f_data;
219 		if (vp->v_type != VREG && vp->v_type != VCHR)
220 			return (EINVAL);
221 		/*
222 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
223 		 * SunOS).
224 		 */
225 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
226 			handle = NULL;
227 			maxprot = VM_PROT_ALL;
228 			flags |= MAP_ANON;
229 		} else {
230 			/*
231 			 * Ensure that file and memory protections are
232 			 * compatible.  Note that we only worry about
233 			 * writability if mapping is shared; in this case,
234 			 * current and max prot are dictated by the open file.
235 			 * XXX use the vnode instead?  Problem is: what
236 			 * credentials do we use for determination? What if
237 			 * proc does a setuid?
238 			 */
239 			maxprot = VM_PROT_EXECUTE;	/* ??? */
240 			if (fp->f_flag & FREAD)
241 				maxprot |= VM_PROT_READ;
242 			else if (prot & PROT_READ)
243 				return (EACCES);
244 			if (flags & MAP_SHARED) {
245 				if (fp->f_flag & FWRITE)
246 					maxprot |= VM_PROT_WRITE;
247 				else if (prot & PROT_WRITE)
248 					return (EACCES);
249 			} else
250 				maxprot |= VM_PROT_WRITE;
251 			handle = (caddr_t) vp;
252 		}
253 	}
254 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
255 	    flags, handle, uap->pos);
256 	if (error == 0)
257 		*retval = (int) addr;
258 	return (error);
259 }
260 
261 #ifdef COMPAT_43
262 #ifndef _SYS_SYSPROTO_H_
263 struct ommap_args {
264 	caddr_t addr;
265 	int len;
266 	int prot;
267 	int flags;
268 	int fd;
269 	long pos;
270 };
271 #endif
272 int
273 ommap(p, uap, retval)
274 	struct proc *p;
275 	register struct ommap_args *uap;
276 	int *retval;
277 {
278 	struct mmap_args nargs;
279 	static const char cvtbsdprot[8] = {
280 		0,
281 		PROT_EXEC,
282 		PROT_WRITE,
283 		PROT_EXEC | PROT_WRITE,
284 		PROT_READ,
285 		PROT_EXEC | PROT_READ,
286 		PROT_WRITE | PROT_READ,
287 		PROT_EXEC | PROT_WRITE | PROT_READ,
288 	};
289 
290 #define	OMAP_ANON	0x0002
291 #define	OMAP_COPY	0x0020
292 #define	OMAP_SHARED	0x0010
293 #define	OMAP_FIXED	0x0100
294 #define	OMAP_INHERIT	0x0800
295 
296 	nargs.addr = uap->addr;
297 	nargs.len = uap->len;
298 	nargs.prot = cvtbsdprot[uap->prot & 0x7];
299 	nargs.flags = 0;
300 	if (uap->flags & OMAP_ANON)
301 		nargs.flags |= MAP_ANON;
302 	if (uap->flags & OMAP_COPY)
303 		nargs.flags |= MAP_COPY;
304 	if (uap->flags & OMAP_SHARED)
305 		nargs.flags |= MAP_SHARED;
306 	else
307 		nargs.flags |= MAP_PRIVATE;
308 	if (uap->flags & OMAP_FIXED)
309 		nargs.flags |= MAP_FIXED;
310 	if (uap->flags & OMAP_INHERIT)
311 		nargs.flags |= MAP_INHERIT;
312 	nargs.fd = uap->fd;
313 	nargs.pos = uap->pos;
314 	return (mmap(p, &nargs, retval));
315 }
316 #endif				/* COMPAT_43 */
317 
318 
319 #ifndef _SYS_SYSPROTO_H_
320 struct msync_args {
321 	caddr_t addr;
322 	int len;
323 	int flags;
324 };
325 #endif
326 int
327 msync(p, uap, retval)
328 	struct proc *p;
329 	struct msync_args *uap;
330 	int *retval;
331 {
332 	vm_offset_t addr;
333 	vm_size_t size, pageoff;
334 	int flags;
335 	vm_map_t map;
336 	int rv;
337 
338 	addr = (vm_offset_t) uap->addr;
339 	size = uap->len;
340 	flags = uap->flags;
341 
342 	pageoff = (addr & PAGE_MASK);
343 	addr -= pageoff;
344 	size += pageoff;
345 	size = (vm_size_t) round_page(size);
346 	if (addr + size < addr)
347 		return(EINVAL);
348 
349 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
350 		return (EINVAL);
351 
352 	map = &p->p_vmspace->vm_map;
353 
354 	/*
355 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
356 	 * pages with the region containing addr".  Unfortunately, we don't
357 	 * really keep track of individual mmaps so we approximate by flushing
358 	 * the range of the map entry containing addr. This can be incorrect
359 	 * if the region splits or is coalesced with a neighbor.
360 	 */
361 	if (size == 0) {
362 		vm_map_entry_t entry;
363 
364 		vm_map_lock_read(map);
365 		rv = vm_map_lookup_entry(map, addr, &entry);
366 		vm_map_unlock_read(map);
367 		if (rv == FALSE)
368 			return (EINVAL);
369 		addr = entry->start;
370 		size = entry->end - entry->start;
371 	}
372 
373 	/*
374 	 * Clean the pages and interpret the return value.
375 	 */
376 	rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
377 	    (flags & MS_INVALIDATE) != 0);
378 
379 	switch (rv) {
380 	case KERN_SUCCESS:
381 		break;
382 	case KERN_INVALID_ADDRESS:
383 		return (EINVAL);	/* Sun returns ENOMEM? */
384 	case KERN_FAILURE:
385 		return (EIO);
386 	default:
387 		return (EINVAL);
388 	}
389 
390 	return (0);
391 }
392 
393 #ifndef _SYS_SYSPROTO_H_
394 struct munmap_args {
395 	caddr_t addr;
396 	size_t len;
397 };
398 #endif
399 int
400 munmap(p, uap, retval)
401 	register struct proc *p;
402 	register struct munmap_args *uap;
403 	int *retval;
404 {
405 	vm_offset_t addr;
406 	vm_size_t size, pageoff;
407 	vm_map_t map;
408 
409 	addr = (vm_offset_t) uap->addr;
410 	size = uap->len;
411 
412 	pageoff = (addr & PAGE_MASK);
413 	addr -= pageoff;
414 	size += pageoff;
415 	size = (vm_size_t) round_page(size);
416 	if (addr + size < addr)
417 		return(EINVAL);
418 
419 	if (size == 0)
420 		return (0);
421 
422 	/*
423 	 * Check for illegal addresses.  Watch out for address wrap... Note
424 	 * that VM_*_ADDRESS are not constants due to casts (argh).
425 	 */
426 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
427 		return (EINVAL);
428 #ifndef i386
429 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
430 		return (EINVAL);
431 #endif
432 	if (addr + size < addr)
433 		return (EINVAL);
434 	map = &p->p_vmspace->vm_map;
435 	/*
436 	 * Make sure entire range is allocated.
437 	 */
438 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
439 		return (EINVAL);
440 	/* returns nothing but KERN_SUCCESS anyway */
441 	(void) vm_map_remove(map, addr, addr + size);
442 	return (0);
443 }
444 
445 void
446 munmapfd(p, fd)
447 	struct proc *p;
448 	int fd;
449 {
450 	/*
451 	 * XXX should unmap any regions mapped to this file
452 	 */
453 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
454 }
455 
456 #ifndef _SYS_SYSPROTO_H_
457 struct mprotect_args {
458 	caddr_t addr;
459 	size_t len;
460 	int prot;
461 };
462 #endif
463 int
464 mprotect(p, uap, retval)
465 	struct proc *p;
466 	struct mprotect_args *uap;
467 	int *retval;
468 {
469 	vm_offset_t addr;
470 	vm_size_t size, pageoff;
471 	register vm_prot_t prot;
472 
473 	addr = (vm_offset_t) uap->addr;
474 	size = uap->len;
475 	prot = uap->prot & VM_PROT_ALL;
476 
477 	pageoff = (addr & PAGE_MASK);
478 	addr -= pageoff;
479 	size += pageoff;
480 	size = (vm_size_t) round_page(size);
481 	if (addr + size < addr)
482 		return(EINVAL);
483 
484 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
485 		FALSE)) {
486 	case KERN_SUCCESS:
487 		return (0);
488 	case KERN_PROTECTION_FAILURE:
489 		return (EACCES);
490 	}
491 	return (EINVAL);
492 }
493 
494 #ifndef _SYS_SYSPROTO_H_
495 struct minherit_args {
496 	caddr_t addr;
497 	size_t len;
498 	int inherit;
499 };
500 #endif
501 int
502 minherit(p, uap, retval)
503 	struct proc *p;
504 	struct minherit_args *uap;
505 	int *retval;
506 {
507 	vm_offset_t addr;
508 	vm_size_t size, pageoff;
509 	register vm_inherit_t inherit;
510 
511 	addr = (vm_offset_t)uap->addr;
512 	size = uap->len;
513 	inherit = uap->inherit;
514 
515 	pageoff = (addr & PAGE_MASK);
516 	addr -= pageoff;
517 	size += pageoff;
518 	size = (vm_size_t) round_page(size);
519 	if (addr + size < addr)
520 		return(EINVAL);
521 
522 	switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
523 	    inherit)) {
524 	case KERN_SUCCESS:
525 		return (0);
526 	case KERN_PROTECTION_FAILURE:
527 		return (EACCES);
528 	}
529 	return (EINVAL);
530 }
531 
532 #ifndef _SYS_SYSPROTO_H_
533 struct madvise_args {
534 	caddr_t addr;
535 	size_t len;
536 	int behav;
537 };
538 #endif
539 
540 /* ARGSUSED */
541 int
542 madvise(p, uap, retval)
543 	struct proc *p;
544 	struct madvise_args *uap;
545 	int *retval;
546 {
547 	vm_map_t map;
548 	pmap_t pmap;
549 	vm_offset_t start, end;
550 	/*
551 	 * Check for illegal addresses.  Watch out for address wrap... Note
552 	 * that VM_*_ADDRESS are not constants due to casts (argh).
553 	 */
554 	if (VM_MAXUSER_ADDRESS > 0 &&
555 		((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
556 		return (EINVAL);
557 #ifndef i386
558 	if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
559 		return (EINVAL);
560 #endif
561 	if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
562 		return (EINVAL);
563 
564 	/*
565 	 * Since this routine is only advisory, we default to conservative
566 	 * behavior.
567 	 */
568 	start = trunc_page((vm_offset_t) uap->addr);
569 	end = round_page((vm_offset_t) uap->addr + uap->len);
570 
571 	map = &p->p_vmspace->vm_map;
572 	pmap = &p->p_vmspace->vm_pmap;
573 
574 	vm_map_madvise(map, pmap, start, end, uap->behav);
575 
576 	return (0);
577 }
578 
579 #ifndef _SYS_SYSPROTO_H_
580 struct mincore_args {
581 	caddr_t addr;
582 	size_t len;
583 	char *vec;
584 };
585 #endif
586 
587 /* ARGSUSED */
588 int
589 mincore(p, uap, retval)
590 	struct proc *p;
591 	struct mincore_args *uap;
592 	int *retval;
593 {
594 	vm_offset_t addr, first_addr;
595 	vm_offset_t end, cend;
596 	pmap_t pmap;
597 	vm_map_t map;
598 	char *vec;
599 	int error;
600 	int vecindex, lastvecindex;
601 	register vm_map_entry_t current;
602 	vm_map_entry_t entry;
603 	int mincoreinfo;
604 
605 	/*
606 	 * Make sure that the addresses presented are valid for user
607 	 * mode.
608 	 */
609 	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
610 	end = addr + (vm_size_t)round_page(uap->len);
611 	if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
612 		return (EINVAL);
613 	if (end < addr)
614 		return (EINVAL);
615 
616 	/*
617 	 * Address of byte vector
618 	 */
619 	vec = uap->vec;
620 
621 	map = &p->p_vmspace->vm_map;
622 	pmap = &p->p_vmspace->vm_pmap;
623 
624 	vm_map_lock(map);
625 
626 	/*
627 	 * Not needed here
628 	 */
629 #if 0
630 	VM_MAP_RANGE_CHECK(map, addr, end);
631 #endif
632 
633 	if (!vm_map_lookup_entry(map, addr, &entry))
634 		entry = entry->next;
635 
636 	/*
637 	 * Do this on a map entry basis so that if the pages are not
638 	 * in the current processes address space, we can easily look
639 	 * up the pages elsewhere.
640 	 */
641 	lastvecindex = -1;
642 	for(current = entry;
643 		(current != &map->header) && (current->start < end);
644 		current = current->next) {
645 
646 		/*
647 		 * ignore submaps (for now) or null objects
648 		 */
649 		if (current->is_a_map || current->is_sub_map ||
650 			current->object.vm_object == NULL)
651 			continue;
652 
653 		/*
654 		 * limit this scan to the current map entry and the
655 		 * limits for the mincore call
656 		 */
657 		if (addr < current->start)
658 			addr = current->start;
659 		cend = current->end;
660 		if (cend > end)
661 			cend = end;
662 
663 		/*
664 		 * scan this entry one page at a time
665 		 */
666 		while(addr < cend) {
667 			/*
668 			 * Check pmap first, it is likely faster, also
669 			 * it can provide info as to whether we are the
670 			 * one referencing or modifying the page.
671 			 */
672 			mincoreinfo = pmap_mincore(pmap, addr);
673 			if (!mincoreinfo) {
674 				vm_pindex_t pindex;
675 				vm_ooffset_t offset;
676 				vm_page_t m;
677 				/*
678 				 * calculate the page index into the object
679 				 */
680 				offset = current->offset + (addr - current->start);
681 				pindex = OFF_TO_IDX(offset);
682 				m = vm_page_lookup(current->object.vm_object,
683 					pindex);
684 				/*
685 				 * if the page is resident, then gather information about
686 				 * it.
687 				 */
688 				if (m) {
689 					mincoreinfo = MINCORE_INCORE;
690 					if (m->dirty ||
691 						pmap_is_modified(VM_PAGE_TO_PHYS(m)))
692 						mincoreinfo |= MINCORE_MODIFIED_OTHER;
693 					if ((m->flags & PG_REFERENCED) ||
694 						pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
695 						mincoreinfo |= MINCORE_REFERENCED_OTHER;
696 				}
697 			}
698 
699 			/*
700 			 * calculate index into user supplied byte vector
701 			 */
702 			vecindex = OFF_TO_IDX(addr - first_addr);
703 
704 			/*
705 			 * If we have skipped map entries, we need to make sure that
706 			 * the byte vector is zeroed for those skipped entries.
707 			 */
708 			while((lastvecindex + 1) < vecindex) {
709 				error = subyte( vec + lastvecindex, 0);
710 				if (error) {
711 					vm_map_unlock(map);
712 					return (EFAULT);
713 				}
714 				++lastvecindex;
715 			}
716 
717 			/*
718 			 * Pass the page information to the user
719 			 */
720 			error = subyte( vec + vecindex, mincoreinfo);
721 			if (error) {
722 				vm_map_unlock(map);
723 				return (EFAULT);
724 			}
725 			lastvecindex = vecindex;
726 			addr += PAGE_SIZE;
727 		}
728 	}
729 
730 	/*
731 	 * Zero the last entries in the byte vector.
732 	 */
733 	vecindex = OFF_TO_IDX(end - first_addr);
734 	while((lastvecindex + 1) < vecindex) {
735 		error = subyte( vec + lastvecindex, 0);
736 		if (error) {
737 			vm_map_unlock(map);
738 			return (EFAULT);
739 		}
740 		++lastvecindex;
741 	}
742 
743 	vm_map_unlock(map);
744 	return (0);
745 }
746 
747 #ifndef _SYS_SYSPROTO_H_
748 struct mlock_args {
749 	caddr_t addr;
750 	size_t len;
751 };
752 #endif
753 int
754 mlock(p, uap, retval)
755 	struct proc *p;
756 	struct mlock_args *uap;
757 	int *retval;
758 {
759 	vm_offset_t addr;
760 	vm_size_t size, pageoff;
761 	int error;
762 
763 	addr = (vm_offset_t) uap->addr;
764 	size = uap->len;
765 
766 	pageoff = (addr & PAGE_MASK);
767 	addr -= pageoff;
768 	size += pageoff;
769 	size = (vm_size_t) round_page(size);
770 
771 	/* disable wrap around */
772 	if (addr + size < addr)
773 		return (EINVAL);
774 
775 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
776 		return (EAGAIN);
777 
778 #ifdef pmap_wired_count
779 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
780 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
781 		return (EAGAIN);
782 #else
783 	error = suser(p->p_ucred, &p->p_acflag);
784 	if (error)
785 		return (error);
786 #endif
787 
788 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
789 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
790 }
791 
792 #ifndef _SYS_SYSPROTO_H_
793 struct munlock_args {
794 	caddr_t addr;
795 	size_t len;
796 };
797 #endif
798 int
799 munlock(p, uap, retval)
800 	struct proc *p;
801 	struct munlock_args *uap;
802 	int *retval;
803 {
804 	vm_offset_t addr;
805 	vm_size_t size, pageoff;
806 	int error;
807 
808 	addr = (vm_offset_t) uap->addr;
809 	size = uap->len;
810 
811 	pageoff = (addr & PAGE_MASK);
812 	addr -= pageoff;
813 	size += pageoff;
814 	size = (vm_size_t) round_page(size);
815 
816 	/* disable wrap around */
817 	if (addr + size < addr)
818 		return (EINVAL);
819 
820 #ifndef pmap_wired_count
821 	error = suser(p->p_ucred, &p->p_acflag);
822 	if (error)
823 		return (error);
824 #endif
825 
826 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
827 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
828 }
829 
830 /*
831  * Internal version of mmap.
832  * Currently used by mmap, exec, and sys5 shared memory.
833  * Handle is either a vnode pointer or NULL for MAP_ANON.
834  */
835 int
836 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
837 	register vm_map_t map;
838 	register vm_offset_t *addr;
839 	register vm_size_t size;
840 	vm_prot_t prot, maxprot;
841 	register int flags;
842 	caddr_t handle;		/* XXX should be vp */
843 	vm_ooffset_t foff;
844 {
845 	boolean_t fitit;
846 	vm_object_t object;
847 	struct vnode *vp = NULL;
848 	objtype_t type;
849 	int rv = KERN_SUCCESS;
850 	vm_ooffset_t objsize;
851 	int docow;
852 	struct proc *p = curproc;
853 
854 	if (size == 0)
855 		return (0);
856 
857 	objsize = size = round_page(size);
858 
859 	/*
860 	 * We currently can only deal with page aligned file offsets.
861 	 * The check is here rather than in the syscall because the
862 	 * kernel calls this function internally for other mmaping
863 	 * operations (such as in exec) and non-aligned offsets will
864 	 * cause pmap inconsistencies...so we want to be sure to
865 	 * disallow this in all cases.
866 	 */
867 	if (foff & PAGE_MASK)
868 		return (EINVAL);
869 
870 	if ((flags & MAP_FIXED) == 0) {
871 		fitit = TRUE;
872 		*addr = round_page(*addr);
873 	} else {
874 		if (*addr != trunc_page(*addr))
875 			return (EINVAL);
876 		fitit = FALSE;
877 		(void) vm_map_remove(map, *addr, *addr + size);
878 	}
879 
880 	/*
881 	 * Lookup/allocate object.
882 	 */
883 	if (flags & MAP_ANON) {
884 		type = OBJT_DEFAULT;
885 		/*
886 		 * Unnamed anonymous regions always start at 0.
887 		 */
888 		if (handle == 0)
889 			foff = 0;
890 	} else {
891 		vp = (struct vnode *) handle;
892 		if (vp->v_type == VCHR) {
893 			type = OBJT_DEVICE;
894 			handle = (caddr_t) vp->v_rdev;
895 		} else {
896 			struct vattr vat;
897 			int error;
898 
899 			error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
900 			if (error)
901 				return (error);
902 			objsize = round_page(vat.va_size);
903 			type = OBJT_VNODE;
904 		}
905 	}
906 	object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff);
907 	if (object == NULL)
908 		return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
909 
910 	/*
911 	 * Force device mappings to be shared.
912 	 */
913 	if (type == OBJT_DEVICE) {
914 		flags &= ~(MAP_PRIVATE|MAP_COPY);
915 		flags |= MAP_SHARED;
916 	}
917 
918 	docow = 0;
919 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
920 		docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED;
921 	}
922 
923 	rv = vm_map_find(map, object, foff, addr, size, fitit,
924 			prot, maxprot, docow);
925 
926 
927 	if (rv != KERN_SUCCESS) {
928 		/*
929 		 * Lose the object reference. Will destroy the
930 		 * object if it's an unnamed anonymous mapping
931 		 * or named anonymous without other references.
932 		 */
933 		vm_object_deallocate(object);
934 		goto out;
935 	}
936 
937 	/*
938 	 * "Pre-fault" resident pages.
939 	 */
940 	if ((type == OBJT_VNODE) && (map->pmap != NULL)) {
941 		pmap_object_init_pt(map->pmap, *addr,
942 			object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1);
943 	}
944 
945 	/*
946 	 * Shared memory is also shared with children.
947 	 */
948 	if (flags & (MAP_SHARED|MAP_INHERIT)) {
949 		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
950 		if (rv != KERN_SUCCESS) {
951 			(void) vm_map_remove(map, *addr, *addr + size);
952 			goto out;
953 		}
954 	}
955 out:
956 	switch (rv) {
957 	case KERN_SUCCESS:
958 		return (0);
959 	case KERN_INVALID_ADDRESS:
960 	case KERN_NO_SPACE:
961 		return (ENOMEM);
962 	case KERN_PROTECTION_FAILURE:
963 		return (EACCES);
964 	default:
965 		return (EINVAL);
966 	}
967 }
968