xref: /freebsd/sys/powerpc/aim/mmu_oea.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*
2  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
3  * Copyright (C) 1995, 1996 TooLs GmbH.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed by TooLs GmbH.
17  * 4. The name of TooLs GmbH may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
32  */
33 /*
34  * Copyright (C) 2001 Benno Rice.
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  *
46  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
47  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
48  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
49  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
52  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
53  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
54  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
55  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #ifndef lint
59 static const char rcsid[] =
60   "$FreeBSD$";
61 #endif /* not lint */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/proc.h>
66 #include <sys/malloc.h>
67 #include <sys/msgbuf.h>
68 #include <sys/vmmeter.h>
69 #include <sys/mman.h>
70 #include <sys/queue.h>
71 #include <sys/mutex.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_param.h>
75 #include <sys/lock.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_object.h>
80 #include <vm/vm_extern.h>
81 #include <vm/vm_pageout.h>
82 #include <vm/vm_pager.h>
83 #include <vm/vm_zone.h>
84 
85 #include <sys/user.h>
86 
87 #include <machine/pcb.h>
88 #include <machine/powerpc.h>
89 #include <machine/pte.h>
90 
91 pte_t	*ptable;
92 int	ptab_cnt;
93 u_int	ptab_mask;
94 #define	HTABSIZE	(ptab_cnt * 64)
95 
96 #define	MINPV		2048
97 
98 struct pte_ovfl {
99 	LIST_ENTRY(pte_ovfl) po_list;	/* Linked list of overflow entries */
100 	struct pte	po_pte;		/* PTE for this mapping */
101 };
102 
103 LIST_HEAD(pte_ovtab, pte_ovfl) *potable; /* Overflow entries for ptable */
104 
105 static struct pmap	kernel_pmap_store;
106 pmap_t			kernel_pmap;
107 
108 static int	npgs;
109 static u_int	nextavail;
110 
111 #ifndef MSGBUFADDR
112 extern vm_offset_t	msgbuf_paddr;
113 #endif
114 
115 static struct mem_region	*mem, *avail;
116 
117 vm_offset_t	avail_start;
118 vm_offset_t	avail_end;
119 vm_offset_t	virtual_avail;
120 vm_offset_t	virtual_end;
121 
122 vm_offset_t	kernel_vm_end;
123 
124 static int	pmap_pagedaemon_waken = 0;
125 
126 extern unsigned int	Maxmem;
127 
128 #define	ATTRSHFT	4
129 
130 struct pv_entry	*pv_table;
131 
132 static vm_zone_t	pvzone;
133 static struct vm_zone	pvzone_store;
134 static struct vm_object	pvzone_obj;
135 static int		pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
136 static struct pv_entry	*pvinit;
137 
138 #if !defined(PMAP_SHPGPERPROC)
139 #define	PMAP_SHPGPERPROC	200
140 #endif
141 
142 struct pv_page;
143 struct pv_page_info {
144 	LIST_ENTRY(pv_page) pgi_list;
145 	struct pv_entry	*pgi_freelist;
146 	int		pgi_nfree;
147 };
148 #define	NPVPPG	((PAGE_SIZE - sizeof(struct pv_page_info)) / sizeof(struct pv_entry))
149 struct pv_page {
150 	struct pv_page_info	pvp_pgi;
151 	struct pv_entry		pvp_pv[NPVPPG];
152 };
153 LIST_HEAD(pv_page_list, pv_page) pv_page_freelist;
154 int	pv_nfree;
155 int	pv_pcnt;
156 static struct pv_entry	*pmap_alloc_pv(void);
157 static void		pmap_free_pv(struct pv_entry *);
158 
159 struct po_page;
160 struct po_page_info {
161 	LIST_ENTRY(po_page) pgi_list;
162 	vm_page_t	pgi_page;
163 	LIST_HEAD(po_freelist, pte_ovfl) pgi_freelist;
164 	int		pgi_nfree;
165 };
166 #define	NPOPPG	((PAGE_SIZE - sizeof(struct po_page_info)) / sizeof(struct pte_ovfl))
167 struct po_page {
168 	struct po_page_info	pop_pgi;
169 	struct pte_ovfl		pop_po[NPOPPG];
170 };
171 LIST_HEAD(po_page_list, po_page) po_page_freelist;
172 int	po_nfree;
173 int	po_pcnt;
174 static struct pte_ovfl	*poalloc(void);
175 static void		pofree(struct pte_ovfl *, int);
176 
177 static u_int	usedsr[NPMAPS / sizeof(u_int) / 8];
178 
179 static int	pmap_initialized;
180 
181 int	pte_spill(vm_offset_t);
182 
183 /*
184  * These small routines may have to be replaced,
185  * if/when we support processors other that the 604.
186  */
187 static __inline void
188 tlbie(vm_offset_t ea)
189 {
190 
191 	__asm __volatile ("tlbie %0" :: "r"(ea));
192 }
193 
194 static __inline void
195 tlbsync(void)
196 {
197 
198 	__asm __volatile ("sync; tlbsync; sync");
199 }
200 
201 static __inline void
202 tlbia(void)
203 {
204 	vm_offset_t	i;
205 
206 	__asm __volatile ("sync");
207 	for (i = 0; i < (vm_offset_t)0x00040000; i += 0x00001000) {
208 		tlbie(i);
209 	}
210 	tlbsync();
211 }
212 
213 static __inline int
214 ptesr(sr_t *sr, vm_offset_t addr)
215 {
216 
217 	return sr[(u_int)addr >> ADDR_SR_SHFT];
218 }
219 
220 static __inline int
221 pteidx(sr_t sr, vm_offset_t addr)
222 {
223 	int	hash;
224 
225 	hash = (sr & SR_VSID) ^ (((u_int)addr & ADDR_PIDX) >> ADDR_PIDX_SHFT);
226 	return hash & ptab_mask;
227 }
228 
229 static __inline int
230 ptematch(pte_t *ptp, sr_t sr, vm_offset_t va, int which)
231 {
232 
233 	return ptp->pte_hi == (((sr & SR_VSID) << PTE_VSID_SHFT) |
234 	    (((u_int)va >> ADDR_API_SHFT) & PTE_API) | which);
235 }
236 
237 static __inline struct pv_entry *
238 pa_to_pv(vm_offset_t pa)
239 {
240 #if 0 /* XXX */
241 	int	bank, pg;
242 
243 	bank = vm_physseg_find(atop(pa), &pg);
244 	if (bank == -1)
245 		return NULL;
246 	return &vm_physmem[bank].pmseg.pvent[pg];
247 #endif
248 	return (NULL);
249 }
250 
251 static __inline char *
252 pa_to_attr(vm_offset_t pa)
253 {
254 #if 0 /* XXX */
255 	int	bank, pg;
256 
257 	bank = vm_physseg_find(atop(pa), &pg);
258 	if (bank == -1)
259 		return NULL;
260 	return &vm_physmem[bank].pmseg.attrs[pg];
261 #endif
262 	return (NULL);
263 }
264 
265 /*
266  * Try to insert page table entry *pt into the ptable at idx.
267  *
268  * Note: *pt mustn't have PTE_VALID set.
269  * This is done here as required by Book III, 4.12.
270  */
271 static int
272 pte_insert(int idx, pte_t *pt)
273 {
274 	pte_t	*ptp;
275 	int	i;
276 
277 	/*
278 	 * First try primary hash.
279 	 */
280 	for (ptp = ptable + idx * 8, i = 8; --i >= 0; ptp++) {
281 		if (!(ptp->pte_hi & PTE_VALID)) {
282 			*ptp = *pt;
283 			ptp->pte_hi &= ~PTE_HID;
284 			__asm __volatile ("sync");
285 			ptp->pte_hi |= PTE_VALID;
286 			return 1;
287 		}
288 	}
289 
290 	/*
291 	 * Then try secondary hash.
292 	 */
293 
294 	idx ^= ptab_mask;
295 
296 	for (ptp = ptable + idx * 8, i = 8; --i >= 0; ptp++) {
297 		if (!(ptp->pte_hi & PTE_VALID)) {
298 			*ptp = *pt;
299 			ptp->pte_hi |= PTE_HID;
300 			__asm __volatile ("sync");
301 			ptp->pte_hi |= PTE_VALID;
302 			return 1;
303 		}
304 	}
305 
306 	return 0;
307 }
308 
309 /*
310  * Spill handler.
311  *
312  * Tries to spill a page table entry from the overflow area.
313  * Note that this routine runs in real mode on a separate stack,
314  * with interrupts disabled.
315  */
316 int
317 pte_spill(vm_offset_t addr)
318 {
319 	int		idx, i;
320 	sr_t		sr;
321 	struct pte_ovfl	*po;
322 	pte_t		ps;
323 	pte_t		*pt;
324 
325 	__asm ("mfsrin %0,%1" : "=r"(sr) : "r"(addr));
326 	idx = pteidx(sr, addr);
327 	for (po = potable[idx].lh_first; po; po = po->po_list.le_next) {
328 		if (ptematch(&po->po_pte, sr, addr, 0)) {
329 			/*
330 			 * Now found an entry to be spilled into the real
331 			 * ptable.
332 			 */
333 			if (pte_insert(idx, &po->po_pte)) {
334 				LIST_REMOVE(po, po_list);
335 				pofree(po, 0);
336 				return 1;
337 			}
338 			/*
339 			 * Have to substitute some entry. Use the primary
340 			 * hash for this.
341 			 *
342 			 * Use low bits of timebase as random generator
343 			 */
344 			__asm ("mftb %0" : "=r"(i));
345 			pt = ptable + idx * 8 + (i & 7);
346 			pt->pte_hi &= ~PTE_VALID;
347 			ps = *pt;
348 			__asm __volatile ("sync");
349 			tlbie(addr);
350 			tlbsync();
351 			*pt = po->po_pte;
352 			__asm __volatile ("sync");
353 			pt->pte_hi |= PTE_VALID;
354 			po->po_pte = ps;
355 			if (ps.pte_hi & PTE_HID) {
356 				/*
357 				 * We took an entry that was on the alternate
358 				 * hash chain, so move it to it's original
359 				 * chain.
360 				 */
361 				po->po_pte.pte_hi &= ~PTE_HID;
362 				LIST_REMOVE(po, po_list);
363 				LIST_INSERT_HEAD(potable + (idx ^ ptab_mask),
364 						 po, po_list);
365 			}
366 			return 1;
367 		}
368 	}
369 
370 	return 0;
371 }
372 
373 /*
374  * This is called during powerpc_init, before the system is really initialized.
375  */
376 void
377 pmap_bootstrap(u_int kernelstart, u_int kernelend)
378 {
379 	struct mem_region	*mp, *mp1;
380 	int			cnt, i;
381 	u_int			s, e, sz;
382 
383 	/*
384 	 * Get memory.
385 	 */
386 	mem_regions(&mem, &avail);
387 	for (mp = mem; mp->size; mp++)
388 		Maxmem += btoc(mp->size);
389 
390 	/*
391 	 * Count the number of available entries.
392 	 */
393 	for (cnt = 0, mp = avail; mp->size; mp++) {
394 		cnt++;
395 	}
396 
397 	/*
398 	 * Page align all regions.
399 	 * Non-page aligned memory isn't very interesting to us.
400 	 * Also, sort the entries for ascending addresses.
401 	 */
402 	kernelstart &= ~PAGE_MASK;
403 	kernelend = (kernelend + PAGE_MASK) & ~PAGE_MASK;
404 	for (mp = avail; mp->size; mp++) {
405 		s = mp->start;
406 		e = mp->start + mp->size;
407 		/*
408 		 * Check whether this region holds all of the kernel.
409 		 */
410 		if (s < kernelstart && e > kernelend) {
411 			avail[cnt].start = kernelend;
412 			avail[cnt++].size = e - kernelend;
413 			e = kernelstart;
414 		}
415 		/*
416 		 * Look whether this regions starts within the kernel.
417 		 */
418 		if (s >= kernelstart && s < kernelend) {
419 			if (e <= kernelend)
420 				goto empty;
421 			s = kernelend;
422 		}
423 		/*
424 		 * Now look whether this region ends within the kernel.
425 		 */
426 		if (e > kernelstart && e <= kernelend) {
427 			if (s >= kernelstart)
428 				goto empty;
429 			e = kernelstart;
430 		}
431 		/*
432 		 * Now page align the start and size of the region.
433 		 */
434 		s = round_page(s);
435 		e = trunc_page(e);
436 		if (e < s) {
437 			e = s;
438 		}
439 		sz = e - s;
440 		/*
441 		 * Check whether some memory is left here.
442 		 */
443 		if (sz == 0) {
444 		empty:
445 			bcopy(mp + 1, mp,
446 			      (cnt - (mp - avail)) * sizeof *mp);
447 			cnt--;
448 			mp--;
449 			continue;
450 		}
451 
452 		/*
453 		 * Do an insertion sort.
454 		 */
455 		npgs += btoc(sz);
456 
457 		for (mp1 = avail; mp1 < mp; mp1++) {
458 			if (s < mp1->start) {
459 				break;
460 			}
461 		}
462 
463 		if (mp1 < mp) {
464 			bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
465 			mp1->start = s;
466 			mp1->size = sz;
467 		} else {
468 			mp->start = s;
469 			mp->size = sz;
470 		}
471 	}
472 
473 #ifdef HTABENTS
474 	ptab_cnt = HTABENTS;
475 #else
476 	ptab_cnt = (Maxmem + 1) / 2;
477 
478 	/* The minimum is 1024 PTEGs. */
479 	if (ptab_cnt < 1024) {
480 		ptab_cnt = 1024;
481 	}
482 
483 	/* Round up to power of 2. */
484 	__asm ("cntlzw %0,%1" : "=r"(i) : "r"(ptab_cnt - 1));
485 	ptab_cnt = 1 << (32 - i);
486 #endif
487 
488 	/*
489 	 * Find suitably aligned memory for HTAB.
490 	 */
491 	for (mp = avail; mp->size; mp++) {
492 		s = roundup(mp->start, HTABSIZE) - mp->start;
493 
494 		if (mp->size < s + HTABSIZE) {
495 			continue;
496 		}
497 
498 		ptable = (pte_t *)(mp->start + s);
499 
500 		if (mp->size == s + HTABSIZE) {
501 			if (s)
502 				mp->size = s;
503 			else {
504 				bcopy(mp + 1, mp,
505 				      (cnt - (mp - avail)) * sizeof *mp);
506 				mp = avail;
507 			}
508 			break;
509 		}
510 
511 		if (s != 0) {
512 			bcopy(mp, mp + 1,
513 			      (cnt - (mp - avail)) * sizeof *mp);
514 			mp++->size = s;
515 			cnt++;
516 		}
517 
518 		mp->start += s + HTABSIZE;
519 		mp->size -= s + HTABSIZE;
520 		break;
521 	}
522 
523 	if (!mp->size) {
524 		panic("not enough memory?");
525 	}
526 
527 	npgs -= btoc(HTABSIZE);
528 	bzero((void *)ptable, HTABSIZE);
529 	ptab_mask = ptab_cnt - 1;
530 
531 	/*
532 	 * We cannot do pmap_steal_memory here,
533 	 * since we don't run with translation enabled yet.
534 	 */
535 	s = sizeof(struct pte_ovtab) * ptab_cnt;
536 	sz = round_page(s);
537 
538 	for (mp = avail; mp->size; mp++) {
539 		if (mp->size >= sz) {
540 			break;
541 		}
542 	}
543 
544 	if (!mp->size) {
545 		panic("not enough memory?");
546 	}
547 
548 	npgs -= btoc(sz);
549 	potable = (struct pte_ovtab *)mp->start;
550 	mp->size -= sz;
551 	mp->start += sz;
552 
553 	if (mp->size <= 0) {
554 		bcopy(mp + 1, mp, (cnt - (mp - avail)) * sizeof *mp);
555 	}
556 
557 	for (i = 0; i < ptab_cnt; i++) {
558 		LIST_INIT(potable + i);
559 	}
560 
561 #ifndef MSGBUFADDR
562 	/*
563 	 * allow for msgbuf
564 	 */
565 	sz = round_page(MSGBUFSIZE);
566 	mp = NULL;
567 
568 	for (mp1 = avail; mp1->size; mp1++) {
569 		if (mp1->size >= sz) {
570 			mp = mp1;
571 		}
572 	}
573 
574 	if (mp == NULL) {
575 		panic("not enough memory?");
576 	}
577 
578 	npgs -= btoc(sz);
579 	msgbuf_paddr = mp->start + mp->size - sz;
580 	mp->size -= sz;
581 
582 	if (mp->size <= 0) {
583 		bcopy(mp + 1, mp, (cnt - (mp - avail)) * sizeof *mp);
584 	}
585 #endif
586 
587 	/*
588 	 * Initialize kernel pmap and hardware.
589 	 */
590 	kernel_pmap = &kernel_pmap_store;
591 
592 	{
593 		int	batu, batl;
594 
595 		batu = 0x80001ffe;
596 		batl = 0x80000012;
597 
598 		__asm ("mtdbatu 1,%0; mtdbatl 1,%1" :: "r" (batu), "r" (batl));
599 	}
600 
601 
602 #if NPMAPS >= KERNEL_SEGMENT / 16
603 	usedsr[KERNEL_SEGMENT / 16 / (sizeof usedsr[0] * 8)]
604 		|= 1 << ((KERNEL_SEGMENT / 16) % (sizeof usedsr[0] * 8));
605 #endif
606 
607 #if 0 /* XXX */
608 	for (i = 0; i < 16; i++) {
609 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT;
610 		__asm __volatile ("mtsrin %0,%1"
611 			      :: "r"(EMPTY_SEGMENT), "r"(i << ADDR_SR_SHFT));
612 	}
613 #endif
614 
615 	for (i = 0; i < 16; i++) {
616 		int	j;
617 
618 		__asm __volatile ("mfsrin %0,%1"
619 			: "=r" (j)
620 			: "r" (i << ADDR_SR_SHFT));
621 
622 		kernel_pmap->pm_sr[i] = j;
623 	}
624 
625 	kernel_pmap->pm_sr[KERNEL_SR] = KERNEL_SEGMENT;
626 	__asm __volatile ("mtsr %0,%1"
627 		      :: "n"(KERNEL_SR), "r"(KERNEL_SEGMENT));
628 
629 	__asm __volatile ("sync; mtsdr1 %0; isync"
630 		      :: "r"((u_int)ptable | (ptab_mask >> 10)));
631 
632 	tlbia();
633 
634 	nextavail = avail->start;
635 	avail_start = avail->start;
636 	for (mp = avail, i = 0; mp->size; mp++) {
637 		avail_end = mp->start + mp->size;
638 		phys_avail[i++] = mp->start;
639 		phys_avail[i++] = mp->start + mp->size;
640 	}
641 
642 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
643 	virtual_end = VM_MAX_KERNEL_ADDRESS;
644 }
645 
646 /*
647  * Initialize anything else for pmap handling.
648  * Called during vm_init().
649  */
650 void
651 pmap_init(vm_offset_t phys_start, vm_offset_t phys_end)
652 {
653 	int	initial_pvs;
654 
655 	/*
656 	 * init the pv free list
657 	 */
658 	initial_pvs = vm_page_array_size;
659 	if (initial_pvs < MINPV) {
660 		initial_pvs = MINPV;
661 	}
662 	pvzone = &pvzone_store;
663 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
664 	    initial_pvs * sizeof(struct pv_entry));
665 	zbootinit(pvzone, "PV ENTRY", sizeof(struct pv_entry), pvinit,
666 	    vm_page_array_size);
667 
668 	pmap_initialized = TRUE;
669 }
670 
671 /*
672  * Initialize a preallocated and zeroed pmap structure.
673  */
674 void
675 pmap_pinit(struct pmap *pm)
676 {
677 	int	i, j;
678 
679 	/*
680 	 * Allocate some segment registers for this pmap.
681 	 */
682 	pm->pm_refs = 1;
683 	for (i = 0; i < sizeof usedsr / sizeof usedsr[0]; i++) {
684 		if (usedsr[i] != 0xffffffff) {
685 			j = ffs(~usedsr[i]) - 1;
686 			usedsr[i] |= 1 << j;
687 			pm->pm_sr[0] = (i * sizeof usedsr[0] * 8 + j) * 16;
688 			for (i = 1; i < 16; i++) {
689 				pm->pm_sr[i] = pm->pm_sr[i - 1] + 1;
690 			}
691 			return;
692 		}
693 	}
694 	panic("out of segments");
695 }
696 
697 void
698 pmap_pinit2(pmap_t pmap)
699 {
700 
701 	/*
702 	 * Nothing to be done.
703 	 */
704 	return;
705 }
706 
707 /*
708  * Add a reference to the given pmap.
709  */
710 void
711 pmap_reference(struct pmap *pm)
712 {
713 
714 	pm->pm_refs++;
715 }
716 
717 /*
718  * Retire the given pmap from service.
719  * Should only be called if the map contains no valid mappings.
720  */
721 void
722 pmap_destroy(struct pmap *pm)
723 {
724 
725 	if (--pm->pm_refs == 0) {
726 		pmap_release(pm);
727 		free((caddr_t)pm, M_VMPGDATA);
728 	}
729 }
730 
731 /*
732  * Release any resources held by the given physical map.
733  * Called when a pmap initialized by pmap_pinit is being released.
734  */
735 void
736 pmap_release(struct pmap *pm)
737 {
738 	int	i, j;
739 
740 	if (!pm->pm_sr[0]) {
741 		panic("pmap_release");
742 	}
743 	i = pm->pm_sr[0] / 16;
744 	j = i % (sizeof usedsr[0] * 8);
745 	i /= sizeof usedsr[0] * 8;
746 	usedsr[i] &= ~(1 << j);
747 }
748 
749 /*
750  * Copy the range specified by src_addr/len
751  * from the source map to the range dst_addr/len
752  * in the destination map.
753  *
754  * This routine is only advisory and need not do anything.
755  */
756 void
757 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vm_offset_t dst_addr,
758     vm_size_t len, vm_offset_t src_addr)
759 {
760 
761 	return;
762 }
763 
764 /*
765  * Garbage collects the physical map system for
766  * pages which are no longer used.
767  * Success need not be guaranteed -- that is, there
768  * may well be pages which are not referenced, but
769  * others may be collected.
770  * Called by the pageout daemon when pages are scarce.
771  */
772 void
773 pmap_collect(void)
774 {
775 
776 	return;
777 }
778 
779 /*
780  * Fill the given physical page with zeroes.
781  */
782 void
783 pmap_zero_page(vm_offset_t pa)
784 {
785 #if 0
786 	bzero((caddr_t)pa, PAGE_SIZE);
787 #else
788 	int	i;
789 
790 	for (i = PAGE_SIZE/CACHELINESIZE; i > 0; i--) {
791 		__asm __volatile ("dcbz 0,%0" :: "r"(pa));
792 		pa += CACHELINESIZE;
793 	}
794 #endif
795 }
796 
797 void
798 pmap_zero_page_area(vm_offset_t pa, int off, int size)
799 {
800 
801 	bzero((caddr_t)pa + off, size);
802 }
803 
804 /*
805  * Copy the given physical source page to its destination.
806  */
807 void
808 pmap_copy_page(vm_offset_t src, vm_offset_t dst)
809 {
810 
811 	bcopy((caddr_t)src, (caddr_t)dst, PAGE_SIZE);
812 }
813 
814 static struct pv_entry *
815 pmap_alloc_pv()
816 {
817 	pv_entry_count++;
818 
819 	if (pv_entry_high_water &&
820 	    (pv_entry_count > pv_entry_high_water) &&
821 	    (pmap_pagedaemon_waken == 0)) {
822 		pmap_pagedaemon_waken = 1;
823 		wakeup(&vm_pages_needed);
824 	}
825 
826 	return zalloc(pvzone);
827 }
828 
829 static void
830 pmap_free_pv(struct pv_entry *pv)
831 {
832 
833 	pv_entry_count--;
834 	zfree(pvzone, pv);
835 }
836 
837 /*
838  * We really hope that we don't need overflow entries
839  * before the VM system is initialized!
840  *
841  * XXX: Should really be switched over to the zone allocator.
842  */
843 static struct pte_ovfl *
844 poalloc()
845 {
846 	struct po_page	*pop;
847 	struct pte_ovfl	*po;
848 	vm_page_t	mem;
849 	int		i;
850 
851 	if (!pmap_initialized) {
852 		panic("poalloc");
853 	}
854 
855 	if (po_nfree == 0) {
856 		/*
857 		 * Since we cannot use maps for potable allocation,
858 		 * we have to steal some memory from the VM system.			XXX
859 		 */
860 		mem = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM);
861 		po_pcnt++;
862 		pop = (struct po_page *)VM_PAGE_TO_PHYS(mem);
863 		pop->pop_pgi.pgi_page = mem;
864 		LIST_INIT(&pop->pop_pgi.pgi_freelist);
865 		for (i = NPOPPG - 1, po = pop->pop_po + 1; --i >= 0; po++) {
866 			LIST_INSERT_HEAD(&pop->pop_pgi.pgi_freelist, po,
867 			    po_list);
868 		}
869 		po_nfree += pop->pop_pgi.pgi_nfree = NPOPPG - 1;
870 		LIST_INSERT_HEAD(&po_page_freelist, pop, pop_pgi.pgi_list);
871 		po = pop->pop_po;
872 	} else {
873 		po_nfree--;
874 		pop = po_page_freelist.lh_first;
875 		if (--pop->pop_pgi.pgi_nfree <= 0) {
876 			LIST_REMOVE(pop, pop_pgi.pgi_list);
877 		}
878 		po = pop->pop_pgi.pgi_freelist.lh_first;
879 		LIST_REMOVE(po, po_list);
880 	}
881 
882 	return po;
883 }
884 
885 static void
886 pofree(struct pte_ovfl *po, int freepage)
887 {
888 	struct po_page	*pop;
889 
890 	pop = (struct po_page *)trunc_page((vm_offset_t)po);
891 	switch (++pop->pop_pgi.pgi_nfree) {
892 	case NPOPPG:
893 		if (!freepage) {
894 			break;
895 		}
896 		po_nfree -= NPOPPG - 1;
897 		po_pcnt--;
898 		LIST_REMOVE(pop, pop_pgi.pgi_list);
899 		vm_page_free(pop->pop_pgi.pgi_page);
900 		return;
901 	case 1:
902 		LIST_INSERT_HEAD(&po_page_freelist, pop, pop_pgi.pgi_list);
903 	default:
904 		break;
905 	}
906 	LIST_INSERT_HEAD(&pop->pop_pgi.pgi_freelist, po, po_list);
907 	po_nfree++;
908 }
909 
910 /*
911  * This returns whether this is the first mapping of a page.
912  */
913 static int
914 pmap_enter_pv(int pteidx, vm_offset_t va, vm_offset_t pa)
915 {
916 	struct pv_entry	*pv, *npv;
917 	int		s, first;
918 
919 	if (!pmap_initialized) {
920 		return 0;
921 	}
922 
923 	s = splimp();
924 
925 	pv = pa_to_pv(pa);
926 	first = pv->pv_idx;
927 	if (pv->pv_idx == -1) {
928 		/*
929 		 * No entries yet, use header as the first entry.
930 		 */
931 		pv->pv_va = va;
932 		pv->pv_idx = pteidx;
933 		pv->pv_next = NULL;
934 	} else {
935 		/*
936 		 * There is at least one other VA mapping this page.
937 		 * Place this entry after the header.
938 		 */
939 		npv = pmap_alloc_pv();
940 		npv->pv_va = va;
941 		npv->pv_idx = pteidx;
942 		npv->pv_next = pv->pv_next;
943 		pv->pv_next = npv;
944 	}
945 	splx(s);
946 	return first;
947 }
948 
949 static void
950 pmap_remove_pv(int pteidx, vm_offset_t va, vm_offset_t pa, struct pte *pte)
951 {
952 	struct pv_entry	*pv, *npv;
953 	char		*attr;
954 
955 	/*
956 	 * First transfer reference/change bits to cache.
957 	 */
958 	attr = pa_to_attr(pa);
959 	if (attr == NULL) {
960 		return;
961 	}
962 	*attr |= (pte->pte_lo & (PTE_REF | PTE_CHG)) >> ATTRSHFT;
963 
964 	/*
965 	 * Remove from the PV table.
966 	 */
967 	pv = pa_to_pv(pa);
968 
969 	/*
970 	 * If it is the first entry on the list, it is actually
971 	 * in the header and we must copy the following entry up
972 	 * to the header.  Otherwise we must search the list for
973 	 * the entry.  In either case we free the now unused entry.
974 	 */
975 	if (pteidx == pv->pv_idx && va == pv->pv_va) {
976 		npv = pv->pv_next;
977 		if (npv) {
978 			*pv = *npv;
979 			pmap_free_pv(npv);
980 		} else {
981 			pv->pv_idx = -1;
982 		}
983 	} else {
984 		for (; (npv = pv->pv_next); pv = npv) {
985 			if (pteidx == npv->pv_idx && va == npv->pv_va) {
986 				break;
987 			}
988 		}
989 		if (npv) {
990 			pv->pv_next = npv->pv_next;
991 			pmap_free_pv(npv);
992 		}
993 #ifdef	DIAGNOSTIC
994 		else {
995 			panic("pmap_remove_pv: not on list\n");
996 		}
997 #endif
998 	}
999 }
1000 
1001 /*
1002  * Insert physical page at pa into the given pmap at virtual address va.
1003  */
1004 void
1005 pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t pg, vm_prot_t prot,
1006     boolean_t wired)
1007 {
1008 	sr_t			sr;
1009 	int			idx, s;
1010 	pte_t			pte;
1011 	struct pte_ovfl		*po;
1012 	struct mem_region	*mp;
1013 	vm_offset_t		pa;
1014 
1015 	pa = VM_PAGE_TO_PHYS(pg) & ~PAGE_MASK;
1016 
1017 	/*
1018 	 * Have to remove any existing mapping first.
1019 	 */
1020 	pmap_remove(pm, va, va + PAGE_SIZE);
1021 
1022 	/*
1023 	 * Compute the HTAB index.
1024 	 */
1025 	idx = pteidx(sr = ptesr(pm->pm_sr, va), va);
1026 	/*
1027 	 * Construct the PTE.
1028 	 *
1029 	 * Note: Don't set the valid bit for correct operation of tlb update.
1030 	 */
1031 	pte.pte_hi = ((sr & SR_VSID) << PTE_VSID_SHFT)
1032 		| ((va & ADDR_PIDX) >> ADDR_API_SHFT);
1033 	pte.pte_lo = (pa & PTE_RPGN) | PTE_M | PTE_I | PTE_G;
1034 
1035 	for (mp = mem; mp->size; mp++) {
1036 		if (pa >= mp->start && pa < mp->start + mp->size) {
1037 			pte.pte_lo &= ~(PTE_I | PTE_G);
1038 			break;
1039 		}
1040 	}
1041 	if (prot & VM_PROT_WRITE) {
1042 		pte.pte_lo |= PTE_RW;
1043 	} else {
1044 		pte.pte_lo |= PTE_RO;
1045 	}
1046 
1047 	/*
1048 	 * Now record mapping for later back-translation.
1049 	 */
1050 	if (pmap_initialized && (pg->flags & PG_FICTITIOUS) == 0) {
1051 		if (pmap_enter_pv(idx, va, pa)) {
1052 			/*
1053 			 * Flush the real memory from the cache.
1054 			 */
1055 			__syncicache((void *)pa, PAGE_SIZE);
1056 		}
1057 	}
1058 
1059 	s = splimp();
1060 	pm->pm_stats.resident_count++;
1061 	/*
1062 	 * Try to insert directly into HTAB.
1063 	 */
1064 	if (pte_insert(idx, &pte)) {
1065 		splx(s);
1066 		return;
1067 	}
1068 
1069 	/*
1070 	 * Have to allocate overflow entry.
1071 	 *
1072 	 * Note, that we must use real addresses for these.
1073 	 */
1074 	po = poalloc();
1075 	po->po_pte = pte;
1076 	LIST_INSERT_HEAD(potable + idx, po, po_list);
1077 	splx(s);
1078 }
1079 
1080 void
1081 pmap_kenter(vm_offset_t va, vm_offset_t pa)
1082 {
1083 	struct vm_page	pg;
1084 
1085 	pg.phys_addr = pa;
1086 	pmap_enter(kernel_pmap, va, &pg, VM_PROT_READ|VM_PROT_WRITE, TRUE);
1087 }
1088 
1089 void
1090 pmap_kremove(vm_offset_t va)
1091 {
1092 	pmap_remove(kernel_pmap, va, va + PAGE_SIZE);
1093 }
1094 
1095 /*
1096  * Remove the given range of mapping entries.
1097  */
1098 void
1099 pmap_remove(struct pmap *pm, vm_offset_t va, vm_offset_t endva)
1100 {
1101 	int		idx, i, s;
1102 	sr_t		sr;
1103 	pte_t		*ptp;
1104 	struct pte_ovfl	*po, *npo;
1105 
1106 	s = splimp();
1107 	while (va < endva) {
1108 		idx = pteidx(sr = ptesr(pm->pm_sr, va), va);
1109 		for (ptp = ptable + idx * 8, i = 8; --i >= 0; ptp++) {
1110 			if (ptematch(ptp, sr, va, PTE_VALID)) {
1111 				pmap_remove_pv(idx, va, ptp->pte_lo, ptp);
1112 				ptp->pte_hi &= ~PTE_VALID;
1113 				__asm __volatile ("sync");
1114 				tlbie(va);
1115 				tlbsync();
1116 				pm->pm_stats.resident_count--;
1117 			}
1118 		}
1119 		for (ptp = ptable + (idx ^ ptab_mask) * 8, i = 8; --i >= 0;
1120 		    ptp++) {
1121 			if (ptematch(ptp, sr, va, PTE_VALID | PTE_HID)) {
1122 				pmap_remove_pv(idx, va, ptp->pte_lo, ptp);
1123 				ptp->pte_hi &= ~PTE_VALID;
1124 				__asm __volatile ("sync");
1125 				tlbie(va);
1126 				tlbsync();
1127 				pm->pm_stats.resident_count--;
1128 			}
1129 		}
1130 		for (po = potable[idx].lh_first; po; po = npo) {
1131 			npo = po->po_list.le_next;
1132 			if (ptematch(&po->po_pte, sr, va, 0)) {
1133 				pmap_remove_pv(idx, va, po->po_pte.pte_lo,
1134 					       &po->po_pte);
1135 				LIST_REMOVE(po, po_list);
1136 				pofree(po, 1);
1137 				pm->pm_stats.resident_count--;
1138 			}
1139 		}
1140 		va += PAGE_SIZE;
1141 	}
1142 	splx(s);
1143 }
1144 
1145 static pte_t *
1146 pte_find(struct pmap *pm, vm_offset_t va)
1147 {
1148 	int		idx, i;
1149 	sr_t		sr;
1150 	pte_t		*ptp;
1151 	struct pte_ovfl	*po;
1152 
1153 	idx = pteidx(sr = ptesr(pm->pm_sr, va), va);
1154 	for (ptp = ptable + idx * 8, i = 8; --i >= 0; ptp++) {
1155 		if (ptematch(ptp, sr, va, PTE_VALID)) {
1156 			return ptp;
1157 		}
1158 	}
1159 	for (ptp = ptable + (idx ^ ptab_mask) * 8, i = 8; --i >= 0; ptp++) {
1160 		if (ptematch(ptp, sr, va, PTE_VALID | PTE_HID)) {
1161 			return ptp;
1162 		}
1163 	}
1164 	for (po = potable[idx].lh_first; po; po = po->po_list.le_next) {
1165 		if (ptematch(&po->po_pte, sr, va, 0)) {
1166 			return &po->po_pte;
1167 		}
1168 	}
1169 	return 0;
1170 }
1171 
1172 /*
1173  * Get the physical page address for the given pmap/virtual address.
1174  */
1175 vm_offset_t
1176 pmap_extract(pmap_t pm, vm_offset_t va)
1177 {
1178 	pte_t	*ptp;
1179 	int	s;
1180 
1181 	s = splimp();
1182 
1183 	if (!(ptp = pte_find(pm, va))) {
1184 		splx(s);
1185 		return (0);
1186 	}
1187 	splx(s);
1188 	return ((ptp->pte_lo & PTE_RPGN) | (va & ADDR_POFF));
1189 }
1190 
1191 /*
1192  * Lower the protection on the specified range of this pmap.
1193  *
1194  * There are only two cases: either the protection is going to 0,
1195  * or it is going to read-only.
1196  */
1197 void
1198 pmap_protect(struct pmap *pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1199 {
1200 	pte_t	*ptp;
1201 	int	valid, s;
1202 
1203 	if (prot & VM_PROT_READ) {
1204 		s = splimp();
1205 		while (sva < eva) {
1206 			ptp = pte_find(pm, sva);
1207 			if (ptp) {
1208 				valid = ptp->pte_hi & PTE_VALID;
1209 				ptp->pte_hi &= ~PTE_VALID;
1210 				__asm __volatile ("sync");
1211 				tlbie(sva);
1212 				tlbsync();
1213 				ptp->pte_lo &= ~PTE_PP;
1214 				ptp->pte_lo |= PTE_RO;
1215 				__asm __volatile ("sync");
1216 				ptp->pte_hi |= valid;
1217 			}
1218 			sva += PAGE_SIZE;
1219 		}
1220 		splx(s);
1221 		return;
1222 	}
1223 	pmap_remove(pm, sva, eva);
1224 }
1225 
1226 boolean_t
1227 ptemodify(vm_page_t pg, u_int mask, u_int val)
1228 {
1229 	vm_offset_t	pa;
1230 	struct pv_entry	*pv;
1231 	pte_t		*ptp;
1232 	struct pte_ovfl	*po;
1233 	int		i, s;
1234 	char		*attr;
1235 	int		rv;
1236 
1237 	pa = VM_PAGE_TO_PHYS(pg);
1238 
1239 	/*
1240 	 * First modify bits in cache.
1241 	 */
1242 	attr = pa_to_attr(pa);
1243 	if (attr == NULL) {
1244 		return FALSE;
1245 	}
1246 
1247 	*attr &= ~mask >> ATTRSHFT;
1248 	*attr |= val >> ATTRSHFT;
1249 
1250 	pv = pa_to_pv(pa);
1251 	if (pv->pv_idx < 0) {
1252 		return FALSE;
1253 	}
1254 
1255 	rv = FALSE;
1256 	s = splimp();
1257 	for (; pv; pv = pv->pv_next) {
1258 		for (ptp = ptable + pv->pv_idx * 8, i = 8; --i >= 0; ptp++) {
1259 			if ((ptp->pte_hi & PTE_VALID)
1260 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1261 				ptp->pte_hi &= ~PTE_VALID;
1262 				__asm __volatile ("sync");
1263 				tlbie(pv->pv_va);
1264 				tlbsync();
1265 				rv |= ptp->pte_lo & mask;
1266 				ptp->pte_lo &= ~mask;
1267 				ptp->pte_lo |= val;
1268 				__asm __volatile ("sync");
1269 				ptp->pte_hi |= PTE_VALID;
1270 			}
1271 		}
1272 		for (ptp = ptable + (pv->pv_idx ^ ptab_mask) * 8, i = 8;
1273 		    --i >= 0; ptp++) {
1274 			if ((ptp->pte_hi & PTE_VALID)
1275 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1276 				ptp->pte_hi &= ~PTE_VALID;
1277 				__asm __volatile ("sync");
1278 				tlbie(pv->pv_va);
1279 				tlbsync();
1280 				rv |= ptp->pte_lo & mask;
1281 				ptp->pte_lo &= ~mask;
1282 				ptp->pte_lo |= val;
1283 				__asm __volatile ("sync");
1284 				ptp->pte_hi |= PTE_VALID;
1285 			}
1286 		}
1287 		for (po = potable[pv->pv_idx].lh_first; po;
1288 		    po = po->po_list.le_next) {
1289 			if ((po->po_pte.pte_lo & PTE_RPGN) == pa) {
1290 				rv |= ptp->pte_lo & mask;
1291 				po->po_pte.pte_lo &= ~mask;
1292 				po->po_pte.pte_lo |= val;
1293 			}
1294 		}
1295 	}
1296 	splx(s);
1297 	return rv != 0;
1298 }
1299 
1300 int
1301 ptebits(vm_page_t pg, int bit)
1302 {
1303 	struct pv_entry	*pv;
1304 	pte_t		*ptp;
1305 	struct pte_ovfl	*po;
1306 	int		i, s, bits;
1307 	char		*attr;
1308 	vm_offset_t	pa;
1309 
1310 	bits = 0;
1311 	pa = VM_PAGE_TO_PHYS(pg);
1312 
1313 	/*
1314 	 * First try the cache.
1315 	 */
1316 	attr = pa_to_attr(pa);
1317 	if (attr == NULL) {
1318 		return 0;
1319 	}
1320 	bits |= (*attr << ATTRSHFT) & bit;
1321 	if (bits == bit) {
1322 		return bits;
1323 	}
1324 
1325 	pv = pa_to_pv(pa);
1326 	if (pv->pv_idx < 0) {
1327 		return 0;
1328 	}
1329 
1330 	s = splimp();
1331 	for (; pv; pv = pv->pv_next) {
1332 		for (ptp = ptable + pv->pv_idx * 8, i = 8; --i >= 0; ptp++) {
1333 			if ((ptp->pte_hi & PTE_VALID)
1334 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1335 				bits |= ptp->pte_lo & bit;
1336 				if (bits == bit) {
1337 					splx(s);
1338 					return bits;
1339 				}
1340 			}
1341 		}
1342 		for (ptp = ptable + (pv->pv_idx ^ ptab_mask) * 8, i = 8;
1343 		    --i >= 0; ptp++) {
1344 			if ((ptp->pte_hi & PTE_VALID)
1345 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1346 				bits |= ptp->pte_lo & bit;
1347 				if (bits == bit) {
1348 					splx(s);
1349 					return bits;
1350 				}
1351 			}
1352 		}
1353 		for (po = potable[pv->pv_idx].lh_first; po;
1354 		    po = po->po_list.le_next) {
1355 			if ((po->po_pte.pte_lo & PTE_RPGN) == pa) {
1356 				bits |= po->po_pte.pte_lo & bit;
1357 				if (bits == bit) {
1358 					splx(s);
1359 					return bits;
1360 				}
1361 			}
1362 		}
1363 	}
1364 	splx(s);
1365 	return bits;
1366 }
1367 
1368 /*
1369  * Lower the protection on the specified physical page.
1370  *
1371  * There are only two cases: either the protection is going to 0,
1372  * or it is going to read-only.
1373  */
1374 void
1375 pmap_page_protect(vm_page_t m, vm_prot_t prot)
1376 {
1377 	vm_offset_t	pa;
1378 	vm_offset_t	va;
1379 	pte_t		*ptp;
1380 	struct pte_ovfl	*po, *npo;
1381 	int		i, s, idx;
1382 	struct pv_entry	*pv;
1383 
1384 	pa = VM_PAGE_TO_PHYS(m);
1385 
1386 	pa &= ~ADDR_POFF;
1387 	if (prot & VM_PROT_READ) {
1388 		ptemodify(m, PTE_PP, PTE_RO);
1389 		return;
1390 	}
1391 
1392 	pv = pa_to_pv(pa);
1393 	if (pv == NULL) {
1394 		return;
1395 	}
1396 
1397 	s = splimp();
1398 	while (pv->pv_idx >= 0) {
1399 		idx = pv->pv_idx;
1400 		va = pv->pv_va;
1401 		for (ptp = ptable + idx * 8, i = 8; --i >= 0; ptp++) {
1402 			if ((ptp->pte_hi & PTE_VALID)
1403 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1404 				pmap_remove_pv(idx, va, pa, ptp);
1405 				ptp->pte_hi &= ~PTE_VALID;
1406 				__asm __volatile ("sync");
1407 				tlbie(va);
1408 				tlbsync();
1409 				goto next;
1410 			}
1411 		}
1412 		for (ptp = ptable + (idx ^ ptab_mask) * 8, i = 8; --i >= 0;
1413 		    ptp++) {
1414 			if ((ptp->pte_hi & PTE_VALID)
1415 			    && (ptp->pte_lo & PTE_RPGN) == pa) {
1416 				pmap_remove_pv(idx, va, pa, ptp);
1417 				ptp->pte_hi &= ~PTE_VALID;
1418 				__asm __volatile ("sync");
1419 				tlbie(va);
1420 				tlbsync();
1421 				goto next;
1422 			}
1423 		}
1424 		for (po = potable[idx].lh_first; po; po = npo) {
1425 			npo = po->po_list.le_next;
1426 			if ((po->po_pte.pte_lo & PTE_RPGN) == pa) {
1427 				pmap_remove_pv(idx, va, pa, &po->po_pte);
1428 				LIST_REMOVE(po, po_list);
1429 				pofree(po, 1);
1430 				goto next;
1431 			}
1432 		}
1433 next:
1434 	}
1435 	splx(s);
1436 }
1437 
1438 /*
1439  * Activate the address space for the specified process.  If the process
1440  * is the current process, load the new MMU context.
1441  */
1442 void
1443 pmap_activate(struct proc *p)
1444 {
1445 	struct pcb	*pcb;
1446 	pmap_t		pmap;
1447 	pmap_t		rpm;
1448 	int		psl, i, ksr, seg;
1449 
1450 	pcb = &p->p_addr->u_pcb;
1451 	pmap = p->p_vmspace->vm_map.pmap;
1452 
1453 	/*
1454 	 * XXX Normally performed in cpu_fork().
1455 	 */
1456 	if (pcb->pcb_pm != pmap) {
1457 		pcb->pcb_pm = pmap;
1458 		(vm_offset_t) pcb->pcb_pmreal = pmap_extract(kernel_pmap,
1459 		    (vm_offset_t)pcb->pcb_pm);
1460 	}
1461 
1462 	if (p == curproc) {
1463 		/* Disable interrupts while switching. */
1464 		psl = mfmsr();
1465 		mtmsr(psl & ~PSL_EE);
1466 
1467 #if 0 /* XXX */
1468 		/* Store pointer to new current pmap. */
1469 		curpm = pcb->pcb_pmreal;
1470 #endif
1471 
1472 		/* Save kernel SR. */
1473 		__asm __volatile("mfsr %0,14" : "=r"(ksr) :);
1474 
1475 		/*
1476 		 * Set new segment registers.  We use the pmap's real
1477 		 * address to avoid accessibility problems.
1478 		 */
1479 		rpm = pcb->pcb_pmreal;
1480 		for (i = 0; i < 16; i++) {
1481 			seg = rpm->pm_sr[i];
1482 			__asm __volatile("mtsrin %0,%1"
1483 			    :: "r"(seg), "r"(i << ADDR_SR_SHFT));
1484 		}
1485 
1486 		/* Restore kernel SR. */
1487 		__asm __volatile("mtsr 14,%0" :: "r"(ksr));
1488 
1489 		/* Interrupts are OK again. */
1490 		mtmsr(psl);
1491 	}
1492 }
1493 
1494 /*
1495  * Add a list of wired pages to the kva
1496  * this routine is only used for temporary
1497  * kernel mappings that do not need to have
1498  * page modification or references recorded.
1499  * Note that old mappings are simply written
1500  * over.  The page *must* be wired.
1501  */
1502 void
1503 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
1504 {
1505 	int	i;
1506 
1507 	for (i = 0; i < count; i++) {
1508 		vm_offset_t tva = va + i * PAGE_SIZE;
1509 		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
1510 	}
1511 }
1512 
1513 /*
1514  * this routine jerks page mappings from the
1515  * kernel -- it is meant only for temporary mappings.
1516  */
1517 void
1518 pmap_qremove(vm_offset_t va, int count)
1519 {
1520 	vm_offset_t	end_va;
1521 
1522 	end_va = va + count*PAGE_SIZE;
1523 
1524 	while (va < end_va) {
1525 		unsigned *pte;
1526 
1527 		pte = (unsigned *)vtopte(va);
1528 		*pte = 0;
1529 		tlbie(va);
1530 		va += PAGE_SIZE;
1531 	}
1532 }
1533 
1534 /*
1535  * 	pmap_ts_referenced:
1536  *
1537  *	Return the count of reference bits for a page, clearing all of them.
1538  */
1539 int
1540 pmap_ts_referenced(vm_page_t m)
1541 {
1542 
1543 	/* XXX: coming soon... */
1544 	return (0);
1545 }
1546 
1547 /*
1548  * this routine returns true if a physical page resides
1549  * in the given pmap.
1550  */
1551 boolean_t
1552 pmap_page_exists(pmap_t pmap, vm_page_t m)
1553 {
1554 #if 0 /* XXX: This must go! */
1555 	register pv_entry_t pv;
1556 	int s;
1557 
1558 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
1559 		return FALSE;
1560 
1561 	s = splvm();
1562 
1563 	/*
1564 	 * Not found, check current mappings returning immediately if found.
1565 	 */
1566 	for (pv = pv_table; pv; pv = pv->pv_next) {
1567 		if (pv->pv_pmap == pmap) {
1568 			splx(s);
1569 			return TRUE;
1570 		}
1571 	}
1572 	splx(s);
1573 #endif
1574 	return (FALSE);
1575 }
1576 
1577 /*
1578  *	Used to map a range of physical addresses into kernel
1579  *	virtual address space.
1580  *
1581  *	For now, VM is already on, we only need to map the
1582  *	specified memory.
1583  */
1584 vm_offset_t
1585 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
1586 {
1587 	vm_offset_t	sva, va;
1588 
1589 	sva = *virt;
1590 	va = sva;
1591 
1592 	while (start < end) {
1593 		pmap_kenter(va, start);
1594 		va += PAGE_SIZE;
1595 		start += PAGE_SIZE;
1596 	}
1597 
1598 	*virt = va;
1599 	return (sva);
1600 }
1601 
1602 vm_offset_t
1603 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
1604 {
1605 
1606 	return (addr);
1607 }
1608 
1609 int
1610 pmap_mincore(pmap_t pmap, vm_offset_t addr)
1611 {
1612 
1613 	/* XXX: coming soon... */
1614 	return (0);
1615 }
1616 
1617 void
1618 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
1619     vm_pindex_t pindex, vm_size_t size, int limit)
1620 {
1621 
1622 	/* XXX: coming soon... */
1623 	return;
1624 }
1625 
1626 void
1627 pmap_growkernel(vm_offset_t addr)
1628 {
1629 
1630 	/* XXX: coming soon... */
1631 	return;
1632 }
1633 
1634 /*
1635  * Initialize the address space (zone) for the pv_entries.  Set a
1636  * high water mark so that the system can recover from excessive
1637  * numbers of pv entries.
1638  */
1639 void
1640 pmap_init2()
1641 {
1642 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + vm_page_array_size;
1643 	pv_entry_high_water = 9 * (pv_entry_max / 10);
1644 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
1645 }
1646 
1647 void
1648 pmap_swapin_proc(struct proc *p)
1649 {
1650 
1651 	/* XXX: coming soon... */
1652 	return;
1653 }
1654 
1655 void
1656 pmap_swapout_proc(struct proc *p)
1657 {
1658 
1659 	/* XXX: coming soon... */
1660 	return;
1661 }
1662 
1663 void
1664 pmap_pageable(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, boolean_t pageable)
1665 {
1666 
1667 	return;
1668 }
1669 
1670 void
1671 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
1672 {
1673 
1674 	/* XXX: coming soon... */
1675 	return;
1676 }
1677 
1678 void
1679 pmap_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
1680 {
1681 
1682 	/* XXX: coming soon... */
1683 	return;
1684 }
1685 
1686 void
1687 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1688 {
1689 
1690 	/* XXX: coming soon... */
1691 	return;
1692 }
1693 
1694 void
1695 pmap_pinit0(pmap_t pmap)
1696 {
1697 
1698 	/* XXX: coming soon... */
1699 	return;
1700 }
1701 
1702 void
1703 pmap_dispose_proc(struct proc *p)
1704 {
1705 
1706 	/* XXX: coming soon... */
1707 	return;
1708 }
1709 
1710 vm_offset_t
1711 pmap_steal_memory(vm_size_t size)
1712 {
1713 	vm_size_t bank_size;
1714 	vm_offset_t pa;
1715 
1716 	size = round_page(size);
1717 
1718 	bank_size = phys_avail[1] - phys_avail[0];
1719 	while (size > bank_size) {
1720 		int i;
1721 		for (i = 0; phys_avail[i+2]; i+= 2) {
1722 			phys_avail[i] = phys_avail[i+2];
1723 			phys_avail[i+1] = phys_avail[i+3];
1724 		}
1725 		phys_avail[i] = 0;
1726 		phys_avail[i+1] = 0;
1727 		if (!phys_avail[0])
1728 			panic("pmap_steal_memory: out of memory");
1729 		bank_size = phys_avail[1] - phys_avail[0];
1730 	}
1731 
1732 	pa = phys_avail[0];
1733 	phys_avail[0] += size;
1734 
1735 	bzero((caddr_t) pa, size);
1736 	return pa;
1737 }
1738 
1739 /*
1740  * Create the UPAGES for a new process.
1741  * This routine directly affects the fork perf for a process.
1742  */
1743 void
1744 pmap_new_proc(struct proc *p)
1745 {
1746 	int		i;
1747 	vm_object_t	upobj;
1748 	vm_page_t	m;
1749 	struct user	*up;
1750 	pte_t		pte;
1751 	sr_t		sr;
1752 	int		idx;
1753 
1754 	/*
1755 	 * allocate object for the upages
1756 	 */
1757 	if ((upobj = p->p_upages_obj) == NULL) {
1758 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
1759 		p->p_upages_obj = upobj;
1760 	}
1761 
1762 	/* get a kernel virtual address for the UPAGES for this proc */
1763 	if ((up = p->p_addr) == NULL) {
1764 		up = (struct user *) kmem_alloc_nofault(kernel_map,
1765 				UPAGES * PAGE_SIZE);
1766 		if (up == NULL)
1767 			panic("pmap_new_proc: u_map allocation failed");
1768 		p->p_addr = up;
1769 	}
1770 
1771 	for(i=0;i<UPAGES;i++) {
1772 		vm_offset_t	va;
1773 
1774 		/*
1775 		 * Get a kernel stack page
1776 		 */
1777 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
1778 
1779 		/*
1780 		 * Wire the page
1781 		 */
1782 		m->wire_count++;
1783 		cnt.v_wire_count++;
1784 
1785 		/*
1786 		 * Enter the page into the kernel address space.
1787 		 */
1788 		va = (vm_offset_t)(up + i * PAGE_SIZE);
1789 		idx = pteidx(sr = ptesr(kernel_pmap->pm_sr, va), va);
1790 
1791 		pte.pte_hi = ((sr & SR_VSID) << PTE_VSID_SHFT)
1792 		    | ((va & ADDR_PIDX) >> ADDR_API_SHFT);
1793 		pte.pte_lo = (VM_PAGE_TO_PHYS(m) & PTE_RPGN) | PTE_M | PTE_I |
1794 		    PTE_G | PTE_RW;
1795 
1796 		if (!pte_insert(idx, &pte)) {
1797 			struct pte_ovfl	*po;
1798 
1799 			po = poalloc();
1800 			po->po_pte = pte;
1801 			LIST_INSERT_HEAD(potable + idx, po, po_list);
1802 		}
1803 
1804 		tlbie(va);
1805 
1806 		vm_page_wakeup(m);
1807 		vm_page_flag_clear(m, PG_ZERO);
1808 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
1809 		m->valid = VM_PAGE_BITS_ALL;
1810 	}
1811 }
1812