xref: /freebsd/sys/vm/vm_page.c (revision 05c7a37afb48ddd5ee1bd921a5d46fe59cc70b15)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37  *	$Id: vm_page.c,v 1.49 1996/03/09 06:56:39 dyson Exp $
38  */
39 
40 /*
41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45  *
46  * Permission to use, copy, modify and distribute this software and
47  * its documentation is hereby granted, provided that both the copyright
48  * notice and this permission notice appear in all copies of the
49  * software, derivative works or modified versions, and any portions
50  * thereof, and that both notices appear in supporting documentation.
51  *
52  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55  *
56  * Carnegie Mellon requests users of this software to return to
57  *
58  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59  *  School of Computer Science
60  *  Carnegie Mellon University
61  *  Pittsburgh PA 15213-3890
62  *
63  * any improvements or extensions that they make and grant Carnegie the
64  * rights to redistribute these changes.
65  */
66 
67 /*
68  *	Resident memory management module.
69  */
70 #include "opt_ddb.h"
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/malloc.h>
75 #include <sys/proc.h>
76 #include <sys/queue.h>
77 #include <sys/vmmeter.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/vm_prot.h>
82 #include <vm/lock.h>
83 #include <vm/vm_kern.h>
84 #include <vm/vm_object.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_pageout.h>
88 #include <vm/vm_extern.h>
89 
90 #ifdef DDB
91 extern void	DDB_print_page_info __P((void));
92 #endif
93 
94 /*
95  *	Associated with page of user-allocatable memory is a
96  *	page structure.
97  */
98 
99 static struct pglist *vm_page_buckets;	/* Array of buckets */
100 static int vm_page_bucket_count;	/* How big is array? */
101 static int vm_page_hash_mask;		/* Mask for hash function */
102 
103 struct pglist vm_page_queue_free;
104 struct pglist vm_page_queue_zero;
105 struct pglist vm_page_queue_active;
106 struct pglist vm_page_queue_inactive;
107 struct pglist vm_page_queue_cache;
108 
109 int no_queue;
110 
111 struct {
112 	struct pglist *pl;
113 	int	*cnt;
114 } vm_page_queues[PQ_CACHE+1] = {
115 	{NULL, &no_queue},
116 	{ &vm_page_queue_free, &cnt.v_free_count},
117 	{ &vm_page_queue_zero, &cnt.v_free_count},
118 	{ &vm_page_queue_inactive, &cnt.v_inactive_count},
119 	{ &vm_page_queue_active, &cnt.v_active_count},
120 	{ &vm_page_queue_cache, &cnt.v_cache_count}
121 };
122 
123 vm_page_t vm_page_array;
124 static int vm_page_array_size;
125 long first_page;
126 static long last_page;
127 static vm_size_t page_mask;
128 static int page_shift;
129 int vm_page_zero_count;
130 
131 /*
132  * map of contiguous valid DEV_BSIZE chunks in a page
133  * (this list is valid for page sizes upto 16*DEV_BSIZE)
134  */
135 static u_short vm_page_dev_bsize_chunks[] = {
136 	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
137 	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
138 };
139 
140 static inline __pure int
141 		vm_page_hash __P((vm_object_t object, vm_pindex_t pindex))
142 		__pure2;
143 static void	vm_page_unqueue __P((vm_page_t ));
144 
145 /*
146  *	vm_set_page_size:
147  *
148  *	Sets the page size, perhaps based upon the memory
149  *	size.  Must be called before any use of page-size
150  *	dependent functions.
151  *
152  *	Sets page_shift and page_mask from cnt.v_page_size.
153  */
154 void
155 vm_set_page_size()
156 {
157 
158 	if (cnt.v_page_size == 0)
159 		cnt.v_page_size = DEFAULT_PAGE_SIZE;
160 	page_mask = cnt.v_page_size - 1;
161 	if ((page_mask & cnt.v_page_size) != 0)
162 		panic("vm_set_page_size: page size not a power of two");
163 	for (page_shift = 0;; page_shift++)
164 		if ((1 << page_shift) == cnt.v_page_size)
165 			break;
166 }
167 
168 /*
169  *	vm_page_startup:
170  *
171  *	Initializes the resident memory module.
172  *
173  *	Allocates memory for the page cells, and
174  *	for the object/offset-to-page hash table headers.
175  *	Each page cell is initialized and placed on the free list.
176  */
177 
178 vm_offset_t
179 vm_page_startup(starta, enda, vaddr)
180 	register vm_offset_t starta;
181 	vm_offset_t enda;
182 	register vm_offset_t vaddr;
183 {
184 	register vm_offset_t mapped;
185 	register vm_page_t m;
186 	register struct pglist *bucket;
187 	vm_size_t npages, page_range;
188 	register vm_offset_t new_start;
189 	int i;
190 	vm_offset_t pa;
191 	int nblocks;
192 	vm_offset_t first_managed_page;
193 
194 	/* the biggest memory array is the second group of pages */
195 	vm_offset_t start;
196 	vm_offset_t biggestone, biggestsize;
197 
198 	vm_offset_t total;
199 
200 	total = 0;
201 	biggestsize = 0;
202 	biggestone = 0;
203 	nblocks = 0;
204 	vaddr = round_page(vaddr);
205 
206 	for (i = 0; phys_avail[i + 1]; i += 2) {
207 		phys_avail[i] = round_page(phys_avail[i]);
208 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
209 	}
210 
211 	for (i = 0; phys_avail[i + 1]; i += 2) {
212 		int size = phys_avail[i + 1] - phys_avail[i];
213 
214 		if (size > biggestsize) {
215 			biggestone = i;
216 			biggestsize = size;
217 		}
218 		++nblocks;
219 		total += size;
220 	}
221 
222 	start = phys_avail[biggestone];
223 
224 	/*
225 	 * Initialize the queue headers for the free queue, the active queue
226 	 * and the inactive queue.
227 	 */
228 
229 	TAILQ_INIT(&vm_page_queue_free);
230 	TAILQ_INIT(&vm_page_queue_zero);
231 	TAILQ_INIT(&vm_page_queue_active);
232 	TAILQ_INIT(&vm_page_queue_inactive);
233 	TAILQ_INIT(&vm_page_queue_cache);
234 
235 	/*
236 	 * Allocate (and initialize) the hash table buckets.
237 	 *
238 	 * The number of buckets MUST BE a power of 2, and the actual value is
239 	 * the next power of 2 greater than the number of physical pages in
240 	 * the system.
241 	 *
242 	 * Note: This computation can be tweaked if desired.
243 	 */
244 	vm_page_buckets = (struct pglist *) vaddr;
245 	bucket = vm_page_buckets;
246 	if (vm_page_bucket_count == 0) {
247 		vm_page_bucket_count = 2;
248 		while (vm_page_bucket_count < atop(total))
249 			vm_page_bucket_count <<= 1;
250 	}
251 	vm_page_hash_mask = vm_page_bucket_count - 1;
252 
253 	/*
254 	 * Validate these addresses.
255 	 */
256 
257 	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
258 	new_start = round_page(new_start);
259 	mapped = vaddr;
260 	vaddr = pmap_map(mapped, start, new_start,
261 	    VM_PROT_READ | VM_PROT_WRITE);
262 	start = new_start;
263 	bzero((caddr_t) mapped, vaddr - mapped);
264 	mapped = vaddr;
265 
266 	for (i = 0; i < vm_page_bucket_count; i++) {
267 		TAILQ_INIT(bucket);
268 		bucket++;
269 	}
270 
271 	/*
272 	 * round (or truncate) the addresses to our page size.
273 	 */
274 
275 	/*
276 	 * Pre-allocate maps and map entries that cannot be dynamically
277 	 * allocated via malloc().  The maps include the kernel_map and
278 	 * kmem_map which must be initialized before malloc() will work
279 	 * (obviously).  Also could include pager maps which would be
280 	 * allocated before kmeminit.
281 	 *
282 	 * Allow some kernel map entries... this should be plenty since people
283 	 * shouldn't be cluttering up the kernel map (they should use their
284 	 * own maps).
285 	 */
286 
287 	kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
288 	    MAX_KMAPENT * sizeof(struct vm_map_entry);
289 	kentry_data_size = round_page(kentry_data_size);
290 	kentry_data = (vm_offset_t) vaddr;
291 	vaddr += kentry_data_size;
292 
293 	/*
294 	 * Validate these zone addresses.
295 	 */
296 
297 	new_start = start + (vaddr - mapped);
298 	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
299 	bzero((caddr_t) mapped, (vaddr - mapped));
300 	start = round_page(new_start);
301 
302 	/*
303 	 * Compute the number of pages of memory that will be available for
304 	 * use (taking into account the overhead of a page structure per
305 	 * page).
306 	 */
307 
308 	first_page = phys_avail[0] / PAGE_SIZE;
309 	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
310 
311 	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
312 	npages = (total - (page_range * sizeof(struct vm_page)) -
313 	    (start - phys_avail[biggestone])) / PAGE_SIZE;
314 
315 	/*
316 	 * Initialize the mem entry structures now, and put them in the free
317 	 * queue.
318 	 */
319 
320 	vm_page_array = (vm_page_t) vaddr;
321 	mapped = vaddr;
322 
323 	/*
324 	 * Validate these addresses.
325 	 */
326 
327 	new_start = round_page(start + page_range * sizeof(struct vm_page));
328 	mapped = pmap_map(mapped, start, new_start,
329 	    VM_PROT_READ | VM_PROT_WRITE);
330 	start = new_start;
331 
332 	first_managed_page = start / PAGE_SIZE;
333 
334 	/*
335 	 * Clear all of the page structures
336 	 */
337 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
338 	vm_page_array_size = page_range;
339 
340 	cnt.v_page_count = 0;
341 	cnt.v_free_count = 0;
342 	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
343 		if (i == biggestone)
344 			pa = ptoa(first_managed_page);
345 		else
346 			pa = phys_avail[i];
347 		while (pa < phys_avail[i + 1] && npages-- > 0) {
348 			++cnt.v_page_count;
349 			++cnt.v_free_count;
350 			m = PHYS_TO_VM_PAGE(pa);
351 			m->queue = PQ_FREE;
352 			m->flags = 0;
353 			m->phys_addr = pa;
354 			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
355 			pa += PAGE_SIZE;
356 		}
357 	}
358 
359 	return (mapped);
360 }
361 
362 /*
363  *	vm_page_hash:
364  *
365  *	Distributes the object/offset key pair among hash buckets.
366  *
367  *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
368  */
369 static inline __pure int
370 vm_page_hash(object, pindex)
371 	vm_object_t object;
372 	vm_pindex_t pindex;
373 {
374 	return ((unsigned) object + pindex) & vm_page_hash_mask;
375 }
376 
377 /*
378  *	vm_page_insert:		[ internal use only ]
379  *
380  *	Inserts the given mem entry into the object/object-page
381  *	table and object list.
382  *
383  *	The object and page must be locked, and must be splhigh.
384  */
385 
386 inline void
387 vm_page_insert(m, object, pindex)
388 	register vm_page_t m;
389 	register vm_object_t object;
390 	register vm_pindex_t pindex;
391 {
392 	register struct pglist *bucket;
393 
394 	if (m->flags & PG_TABLED)
395 		panic("vm_page_insert: already inserted");
396 
397 	/*
398 	 * Record the object/offset pair in this page
399 	 */
400 
401 	m->object = object;
402 	m->pindex = pindex;
403 
404 	/*
405 	 * Insert it into the object_object/offset hash table
406 	 */
407 
408 	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
409 	TAILQ_INSERT_TAIL(bucket, m, hashq);
410 
411 	/*
412 	 * Now link into the object's list of backed pages.
413 	 */
414 
415 	TAILQ_INSERT_TAIL(&object->memq, m, listq);
416 	m->flags |= PG_TABLED;
417 
418 	/*
419 	 * And show that the object has one more resident page.
420 	 */
421 
422 	object->resident_page_count++;
423 }
424 
425 /*
426  *	vm_page_remove:		[ internal use only ]
427  *				NOTE: used by device pager as well -wfj
428  *
429  *	Removes the given mem entry from the object/offset-page
430  *	table and the object page list.
431  *
432  *	The object and page must be locked, and at splhigh.
433  */
434 
435 inline void
436 vm_page_remove(m)
437 	register vm_page_t m;
438 {
439 	register struct pglist *bucket;
440 
441 	if (!(m->flags & PG_TABLED))
442 		return;
443 
444 	/*
445 	 * Remove from the object_object/offset hash table
446 	 */
447 
448 	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
449 	TAILQ_REMOVE(bucket, m, hashq);
450 
451 	/*
452 	 * Now remove from the object's list of backed pages.
453 	 */
454 
455 	TAILQ_REMOVE(&m->object->memq, m, listq);
456 
457 	/*
458 	 * And show that the object has one fewer resident page.
459 	 */
460 
461 	m->object->resident_page_count--;
462 
463 	m->flags &= ~PG_TABLED;
464 }
465 
466 /*
467  *	vm_page_lookup:
468  *
469  *	Returns the page associated with the object/offset
470  *	pair specified; if none is found, NULL is returned.
471  *
472  *	The object must be locked.  No side effects.
473  */
474 
475 vm_page_t
476 vm_page_lookup(object, pindex)
477 	register vm_object_t object;
478 	register vm_pindex_t pindex;
479 {
480 	register vm_page_t m;
481 	register struct pglist *bucket;
482 	int s;
483 
484 	/*
485 	 * Search the hash table for this object/offset pair
486 	 */
487 
488 	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
489 
490 	s = splhigh();
491 	for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) {
492 		if ((m->object == object) && (m->pindex == pindex)) {
493 			splx(s);
494 			return (m);
495 		}
496 	}
497 
498 	splx(s);
499 	return (NULL);
500 }
501 
502 /*
503  *	vm_page_rename:
504  *
505  *	Move the given memory entry from its
506  *	current object to the specified target object/offset.
507  *
508  *	The object must be locked.
509  */
510 void
511 vm_page_rename(m, new_object, new_pindex)
512 	register vm_page_t m;
513 	register vm_object_t new_object;
514 	vm_pindex_t new_pindex;
515 {
516 	int s;
517 
518 	s = splhigh();
519 	vm_page_remove(m);
520 	vm_page_insert(m, new_object, new_pindex);
521 	splx(s);
522 }
523 
524 /*
525  * vm_page_unqueue must be called at splhigh();
526  */
527 static inline void
528 vm_page_unqueue(vm_page_t m)
529 {
530 	int queue = m->queue;
531 	if (queue == PQ_NONE)
532 		return;
533 	m->queue = PQ_NONE;
534 	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
535 	--(*vm_page_queues[queue].cnt);
536 	if (queue == PQ_CACHE) {
537 		if ((cnt.v_cache_count + cnt.v_free_count) <
538 			(cnt.v_free_reserved + cnt.v_cache_min))
539 			pagedaemon_wakeup();
540 	}
541 	return;
542 }
543 
544 /*
545  *	vm_page_alloc:
546  *
547  *	Allocate and return a memory cell associated
548  *	with this VM object/offset pair.
549  *
550  *	page_req classes:
551  *	VM_ALLOC_NORMAL		normal process request
552  *	VM_ALLOC_SYSTEM		system *really* needs a page
553  *	VM_ALLOC_INTERRUPT	interrupt time request
554  *	VM_ALLOC_ZERO		zero page
555  *
556  *	Object must be locked.
557  */
558 vm_page_t
559 vm_page_alloc(object, pindex, page_req)
560 	vm_object_t object;
561 	vm_pindex_t pindex;
562 	int page_req;
563 {
564 	register vm_page_t m;
565 	int queue;
566 	int s;
567 
568 #ifdef DIAGNOSTIC
569 	m = vm_page_lookup(object, pindex);
570 	if (m)
571 		panic("vm_page_alloc: page already allocated");
572 #endif
573 
574 	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
575 		page_req = VM_ALLOC_SYSTEM;
576 	};
577 
578 	s = splhigh();
579 
580 	switch (page_req) {
581 
582 	case VM_ALLOC_NORMAL:
583 		if (cnt.v_free_count >= cnt.v_free_reserved) {
584 			m = vm_page_queue_free.tqh_first;
585 			if (m == NULL) {
586 				--vm_page_zero_count;
587 				m = vm_page_queue_zero.tqh_first;
588 			}
589 		} else {
590 			m = vm_page_queue_cache.tqh_first;
591 			if (m == NULL) {
592 				splx(s);
593 				pagedaemon_wakeup();
594 				return (NULL);
595 			}
596 		}
597 		break;
598 
599 	case VM_ALLOC_ZERO:
600 		if (cnt.v_free_count >= cnt.v_free_reserved) {
601 			m = vm_page_queue_zero.tqh_first;
602 			if (m) {
603 				--vm_page_zero_count;
604 			} else {
605 				m = vm_page_queue_free.tqh_first;
606 			}
607 		} else {
608 			m = vm_page_queue_cache.tqh_first;
609 			if (m == NULL) {
610 				splx(s);
611 				pagedaemon_wakeup();
612 				return (NULL);
613 			}
614 		}
615 		break;
616 
617 	case VM_ALLOC_SYSTEM:
618 		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
619 		    ((cnt.v_cache_count == 0) &&
620 		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
621 				m = vm_page_queue_free.tqh_first;
622 				if (m == NULL) {
623 					--vm_page_zero_count;
624 					m = vm_page_queue_zero.tqh_first;
625 				}
626 		} else {
627 			m = vm_page_queue_cache.tqh_first;
628 			if (m == NULL) {
629 				splx(s);
630 				pagedaemon_wakeup();
631 				return (NULL);
632 			}
633 		}
634 		break;
635 
636 	case VM_ALLOC_INTERRUPT:
637 		if (cnt.v_free_count > 0) {
638 			m = vm_page_queue_free.tqh_first;
639 			if (m == NULL) {
640 				--vm_page_zero_count;
641 				m = vm_page_queue_zero.tqh_first;
642 			}
643 		} else {
644 			splx(s);
645 			pagedaemon_wakeup();
646 			return (NULL);
647 		}
648 		break;
649 
650 	default:
651 		panic("vm_page_alloc: invalid allocation class");
652 	}
653 
654 	queue = m->queue;
655 	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
656 	--(*vm_page_queues[queue].cnt);
657 	if (queue == PQ_ZERO) {
658 		m->flags = PG_ZERO|PG_BUSY;
659 	} else if (queue == PQ_CACHE) {
660 		vm_page_remove(m);
661 		m->flags = PG_BUSY;
662 	} else {
663 		m->flags = PG_BUSY;
664 	}
665 	m->wire_count = 0;
666 	m->hold_count = 0;
667 	m->act_count = 0;
668 	m->busy = 0;
669 	m->valid = 0;
670 	m->dirty = 0;
671 	m->queue = PQ_NONE;
672 
673 	/* XXX before splx until vm_page_insert is safe */
674 	vm_page_insert(m, object, pindex);
675 
676 	splx(s);
677 
678 	/*
679 	 * Don't wakeup too often - wakeup the pageout daemon when
680 	 * we would be nearly out of memory.
681 	 */
682 	if (((cnt.v_free_count + cnt.v_cache_count) <
683 		(cnt.v_free_reserved + cnt.v_cache_min)) ||
684 			(cnt.v_free_count < cnt.v_pageout_free_min))
685 		pagedaemon_wakeup();
686 
687 	return (m);
688 }
689 
690 /*
691  * This interface is for merging with malloc() someday.
692  * Even if we never implement compaction so that contiguous allocation
693  * works after initialization time, malloc()'s data structures are good
694  * for statistics and for allocations of less than a page.
695  */
696 void *
697 contigmalloc(size, type, flags, low, high, alignment, boundary)
698 	unsigned long size;	/* should be size_t here and for malloc() */
699 	int type;
700 	int flags;
701 	unsigned long low;
702 	unsigned long high;
703 	unsigned long alignment;
704 	unsigned long boundary;
705 {
706 	int i, s, start;
707 	vm_offset_t addr, phys, tmp_addr;
708 	vm_page_t pga = vm_page_array;
709 
710 	size = round_page(size);
711 	if (size == 0)
712 		panic("vm_page_alloc_contig: size must not be 0");
713 	if ((alignment & (alignment - 1)) != 0)
714 		panic("vm_page_alloc_contig: alignment must be a power of 2");
715 	if ((boundary & (boundary - 1)) != 0)
716 		panic("vm_page_alloc_contig: boundary must be a power of 2");
717 
718 	start = 0;
719 	s = splhigh();
720 again:
721 	/*
722 	 * Find first page in array that is free, within range, aligned, and
723 	 * such that the boundary won't be crossed.
724 	 */
725 	for (i = start; i < cnt.v_page_count; i++) {
726 		phys = VM_PAGE_TO_PHYS(&pga[i]);
727 		if ((pga[i].queue == PQ_FREE) &&
728 		    (phys >= low) && (phys < high) &&
729 		    ((phys & (alignment - 1)) == 0) &&
730 		    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
731 			break;
732 	}
733 
734 	/*
735 	 * If the above failed or we will exceed the upper bound, fail.
736 	 */
737 	if ((i == cnt.v_page_count) ||
738 		((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
739 		splx(s);
740 		return (NULL);
741 	}
742 	start = i;
743 
744 	/*
745 	 * Check successive pages for contiguous and free.
746 	 */
747 	for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
748 		if ((VM_PAGE_TO_PHYS(&pga[i]) !=
749 		    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
750 		    (pga[i].queue != PQ_FREE)) {
751 			start++;
752 			goto again;
753 		}
754 	}
755 
756 	/*
757 	 * We've found a contiguous chunk that meets are requirements.
758 	 * Allocate kernel VM, unfree and assign the physical pages to it and
759 	 * return kernel VM pointer.
760 	 */
761 	tmp_addr = addr = kmem_alloc_pageable(kernel_map, size);
762 	if (addr == 0) {
763 		splx(s);
764 		return (NULL);
765 	}
766 
767 	for (i = start; i < (start + size / PAGE_SIZE); i++) {
768 		vm_page_t m = &pga[i];
769 
770 		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
771 		cnt.v_free_count--;
772 		m->valid = VM_PAGE_BITS_ALL;
773 		m->flags = 0;
774 		m->dirty = 0;
775 		m->wire_count = 0;
776 		m->act_count = 0;
777 		m->busy = 0;
778 		m->queue = PQ_NONE;
779 		vm_page_insert(m, kernel_object,
780 			OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
781 		vm_page_wire(m);
782 		pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
783 		tmp_addr += PAGE_SIZE;
784 	}
785 
786 	splx(s);
787 	return ((void *)addr);
788 }
789 
790 vm_offset_t
791 vm_page_alloc_contig(size, low, high, alignment)
792 	vm_offset_t size;
793 	vm_offset_t low;
794 	vm_offset_t high;
795 	vm_offset_t alignment;
796 {
797 	return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high,
798 					  alignment, 0ul));
799 }
800 
801 /*
802  *	vm_page_free:
803  *
804  *	Returns the given page to the free list,
805  *	disassociating it with any VM object.
806  *
807  *	Object and page must be locked prior to entry.
808  */
809 void
810 vm_page_free(m)
811 	register vm_page_t m;
812 {
813 	int s;
814 	int flags = m->flags;
815 
816 	s = splhigh();
817 	if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) {
818 		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n",
819 		    m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0);
820 		if (m->queue == PQ_FREE)
821 			panic("vm_page_free: freeing free page");
822 		else
823 			panic("vm_page_free: freeing busy page");
824 	}
825 
826  	if (m->hold_count) {
827  		panic("freeing held page, count=%d", m->hold_count);
828  	}
829 
830 	vm_page_remove(m);
831 	vm_page_unqueue(m);
832 
833 	if ((flags & PG_FICTITIOUS) == 0) {
834 		if (m->wire_count) {
835 			if (m->wire_count > 1) {
836 				printf("vm_page_free: wire count > 1 (%d)", m->wire_count);
837 				panic("vm_page_free: invalid wire count");
838 			}
839 			cnt.v_wire_count--;
840 			m->wire_count = 0;
841 		}
842 		m->queue = PQ_FREE;
843 		TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
844 		splx(s);
845 		/*
846 		 * if pageout daemon needs pages, then tell it that there are
847 		 * some free.
848 		 */
849 		if (vm_pageout_pages_needed) {
850 			wakeup(&vm_pageout_pages_needed);
851 			vm_pageout_pages_needed = 0;
852 		}
853 
854 		cnt.v_free_count++;
855 		/*
856 		 * wakeup processes that are waiting on memory if we hit a
857 		 * high water mark. And wakeup scheduler process if we have
858 		 * lots of memory. this process will swapin processes.
859 		 */
860 		if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
861 			wakeup(&cnt.v_free_count);
862 			wakeup(&proc0);
863 		}
864 	} else {
865 		splx(s);
866 	}
867 	cnt.v_tfree++;
868 }
869 
870 
871 /*
872  *	vm_page_wire:
873  *
874  *	Mark this page as wired down by yet
875  *	another map, removing it from paging queues
876  *	as necessary.
877  *
878  *	The page queues must be locked.
879  */
880 void
881 vm_page_wire(m)
882 	register vm_page_t m;
883 {
884 	int s;
885 
886 	if (m->wire_count == 0) {
887 		s = splhigh();
888 		vm_page_unqueue(m);
889 		splx(s);
890 		cnt.v_wire_count++;
891 	}
892 	m->wire_count++;
893 	m->flags |= PG_MAPPED;
894 }
895 
896 /*
897  *	vm_page_unwire:
898  *
899  *	Release one wiring of this page, potentially
900  *	enabling it to be paged again.
901  *
902  *	The page queues must be locked.
903  */
904 void
905 vm_page_unwire(m)
906 	register vm_page_t m;
907 {
908 	int s;
909 
910 	s = splhigh();
911 
912 	if (m->wire_count > 0)
913 		m->wire_count--;
914 
915 	if (m->wire_count == 0) {
916 		cnt.v_wire_count--;
917 		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
918 		m->queue = PQ_ACTIVE;
919 		if( m->act_count < ACT_MAX)
920 			m->act_count += 1;
921 		cnt.v_active_count++;
922 	}
923 	splx(s);
924 }
925 
926 /*
927  *	vm_page_activate:
928  *
929  *	Put the specified page on the active list (if appropriate).
930  *
931  *	The page queues must be locked.
932  */
933 void
934 vm_page_activate(m)
935 	register vm_page_t m;
936 {
937 	int s;
938 
939 	s = splhigh();
940 	if (m->queue == PQ_ACTIVE)
941 		panic("vm_page_activate: already active");
942 
943 	if (m->queue == PQ_CACHE)
944 		cnt.v_reactivated++;
945 
946 	vm_page_unqueue(m);
947 
948 	if (m->wire_count == 0) {
949 		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
950 		m->queue = PQ_ACTIVE;
951 		if (m->act_count < 5)
952 			m->act_count = 5;
953 		else if( m->act_count < ACT_MAX)
954 			m->act_count += 1;
955 		cnt.v_active_count++;
956 	}
957 	splx(s);
958 }
959 
960 /*
961  *	vm_page_deactivate:
962  *
963  *	Returns the given page to the inactive list,
964  *	indicating that no physical maps have access
965  *	to this page.  [Used by the physical mapping system.]
966  *
967  *	The page queues must be locked.
968  */
969 void
970 vm_page_deactivate(m)
971 	register vm_page_t m;
972 {
973 	int spl;
974 
975 	/*
976 	 * Only move active pages -- ignore locked or already inactive ones.
977 	 *
978 	 * XXX: sometimes we get pages which aren't wired down or on any queue -
979 	 * we need to put them on the inactive queue also, otherwise we lose
980 	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
981 	 */
982 	if (m->queue == PQ_INACTIVE)
983 		return;
984 
985 	spl = splhigh();
986 	if (m->wire_count == 0 && m->hold_count == 0) {
987 		if (m->queue == PQ_CACHE)
988 			cnt.v_reactivated++;
989 		vm_page_unqueue(m);
990 		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
991 		m->queue = PQ_INACTIVE;
992 		cnt.v_inactive_count++;
993 		m->act_count = 0;
994 	}
995 	splx(spl);
996 }
997 
998 /*
999  * vm_page_cache
1000  *
1001  * Put the specified page onto the page cache queue (if appropriate).
1002  */
1003 void
1004 vm_page_cache(m)
1005 	register vm_page_t m;
1006 {
1007 	int s;
1008 
1009 	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
1010 		printf("vm_page_cache: attempting to cache busy page\n");
1011 		return;
1012 	}
1013 	if (m->queue == PQ_CACHE)
1014 		return;
1015 
1016 	vm_page_protect(m, VM_PROT_NONE);
1017 	s = splhigh();
1018 	vm_page_unqueue(m);
1019 	TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq);
1020 	m->queue = PQ_CACHE;
1021 	cnt.v_cache_count++;
1022 	if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
1023 		wakeup(&cnt.v_free_count);
1024 		wakeup(&proc0);
1025 	}
1026 	if (vm_pageout_pages_needed) {
1027 		wakeup(&vm_pageout_pages_needed);
1028 		vm_pageout_pages_needed = 0;
1029 	}
1030 	splx(s);
1031 }
1032 
1033 /*
1034  *	vm_page_zero_fill:
1035  *
1036  *	Zero-fill the specified page.
1037  *	Written as a standard pagein routine, to
1038  *	be used by the zero-fill object.
1039  */
1040 boolean_t
1041 vm_page_zero_fill(m)
1042 	vm_page_t m;
1043 {
1044 	pmap_zero_page(VM_PAGE_TO_PHYS(m));
1045 	return (TRUE);
1046 }
1047 
1048 /*
1049  *	vm_page_copy:
1050  *
1051  *	Copy one page to another
1052  */
1053 void
1054 vm_page_copy(src_m, dest_m)
1055 	vm_page_t src_m;
1056 	vm_page_t dest_m;
1057 {
1058 	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
1059 	dest_m->valid = VM_PAGE_BITS_ALL;
1060 }
1061 
1062 
1063 /*
1064  * mapping function for valid bits or for dirty bits in
1065  * a page
1066  */
1067 inline int
1068 vm_page_bits(int base, int size)
1069 {
1070 	u_short chunk;
1071 
1072 	if ((base == 0) && (size >= PAGE_SIZE))
1073 		return VM_PAGE_BITS_ALL;
1074 	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1075 	base = (base % PAGE_SIZE) / DEV_BSIZE;
1076 	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1077 	return (chunk << base) & VM_PAGE_BITS_ALL;
1078 }
1079 
1080 /*
1081  * set a page valid and clean
1082  */
1083 void
1084 vm_page_set_validclean(m, base, size)
1085 	vm_page_t m;
1086 	int base;
1087 	int size;
1088 {
1089 	int pagebits = vm_page_bits(base, size);
1090 	m->valid |= pagebits;
1091 	m->dirty &= ~pagebits;
1092 	if( base == 0 && size == PAGE_SIZE)
1093 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1094 }
1095 
1096 /*
1097  * set a page (partially) invalid
1098  */
1099 void
1100 vm_page_set_invalid(m, base, size)
1101 	vm_page_t m;
1102 	int base;
1103 	int size;
1104 {
1105 	int bits;
1106 
1107 	m->valid &= ~(bits = vm_page_bits(base, size));
1108 	if (m->valid == 0)
1109 		m->dirty &= ~bits;
1110 }
1111 
1112 /*
1113  * is (partial) page valid?
1114  */
1115 int
1116 vm_page_is_valid(m, base, size)
1117 	vm_page_t m;
1118 	int base;
1119 	int size;
1120 {
1121 	int bits = vm_page_bits(base, size);
1122 
1123 	if (m->valid && ((m->valid & bits) == bits))
1124 		return 1;
1125 	else
1126 		return 0;
1127 }
1128 
1129 
1130 
1131 void
1132 vm_page_test_dirty(m)
1133 	vm_page_t m;
1134 {
1135 	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1136 	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1137 		m->dirty = VM_PAGE_BITS_ALL;
1138 	}
1139 }
1140 
1141 #ifdef DDB
1142 void
1143 DDB_print_page_info(void)
1144 {
1145 	printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1146 	printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1147 	printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1148 	printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1149 	printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1150 	printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1151 	printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1152 	printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1153 	printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1154 	printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1155 }
1156 #endif
1157