xref: /freebsd/sys/powerpc/aim/mmu_oea64.c (revision 67ca7330cf34a789afbbff9ae7e4cdc4a4917ae3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008-2015 Nathan Whitehorn
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  * Manages physical address maps.
34  *
35  * Since the information managed by this module is also stored by the
36  * logical address mapping module, this module may throw away valid virtual
37  * to physical mappings at almost any time.  However, invalidations of
38  * mappings must be done as requested.
39  *
40  * In order to cope with hardware architectures which make virtual to
41  * physical map invalidates expensive, this module may delay invalidate
42  * reduced protection operations until such time as they are actually
43  * necessary.  This module is given full information as to which processors
44  * are currently using which maps, and to when physical maps must be made
45  * correct.
46  */
47 
48 #include "opt_kstack_pages.h"
49 
50 #include <sys/param.h>
51 #include <sys/kernel.h>
52 #include <sys/conf.h>
53 #include <sys/queue.h>
54 #include <sys/cpuset.h>
55 #include <sys/kerneldump.h>
56 #include <sys/ktr.h>
57 #include <sys/lock.h>
58 #include <sys/msgbuf.h>
59 #include <sys/malloc.h>
60 #include <sys/mutex.h>
61 #include <sys/proc.h>
62 #include <sys/rwlock.h>
63 #include <sys/sched.h>
64 #include <sys/sysctl.h>
65 #include <sys/systm.h>
66 #include <sys/vmmeter.h>
67 #include <sys/smp.h>
68 
69 #include <sys/kdb.h>
70 
71 #include <dev/ofw/openfirm.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_param.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_page.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_extern.h>
80 #include <vm/vm_pageout.h>
81 #include <vm/uma.h>
82 
83 #include <machine/_inttypes.h>
84 #include <machine/cpu.h>
85 #include <machine/platform.h>
86 #include <machine/frame.h>
87 #include <machine/md_var.h>
88 #include <machine/psl.h>
89 #include <machine/bat.h>
90 #include <machine/hid.h>
91 #include <machine/pte.h>
92 #include <machine/sr.h>
93 #include <machine/trap.h>
94 #include <machine/mmuvar.h>
95 
96 #include "mmu_oea64.h"
97 #include "mmu_if.h"
98 #include "moea64_if.h"
99 
100 void moea64_release_vsid(uint64_t vsid);
101 uintptr_t moea64_get_unique_vsid(void);
102 
103 #define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR)
104 #define ENABLE_TRANS(msr)	mtmsr(msr)
105 
106 #define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
107 #define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
108 #define	VSID_HASH_MASK		0x0000007fffffffffULL
109 
110 /*
111  * Locking semantics:
112  *
113  * There are two locks of interest: the page locks and the pmap locks, which
114  * protect their individual PVO lists and are locked in that order. The contents
115  * of all PVO entries are protected by the locks of their respective pmaps.
116  * The pmap of any PVO is guaranteed not to change so long as the PVO is linked
117  * into any list.
118  *
119  */
120 
121 #define PV_LOCK_PER_DOM	PA_LOCK_COUNT*3
122 #define PV_LOCK_COUNT	PV_LOCK_PER_DOM*MAXMEMDOM
123 static struct mtx_padalign pv_lock[PV_LOCK_COUNT];
124 
125 /*
126  * Cheap NUMA-izing of the pv locks, to reduce contention across domains.
127  * NUMA domains on POWER9 appear to be indexed as sparse memory spaces, with the
128  * index at (N << 45).
129  */
130 #ifdef __powerpc64__
131 #define PV_LOCK_IDX(pa)	(pa_index(pa) % PV_LOCK_PER_DOM + \
132 			(((pa) >> 45) % MAXMEMDOM) * PV_LOCK_PER_DOM)
133 #else
134 #define PV_LOCK_IDX(pa)	(pa_index(pa) % PV_LOCK_COUNT)
135 #endif
136 #define PV_LOCKPTR(pa)	((struct mtx *)(&pv_lock[PV_LOCK_IDX(pa)]))
137 #define PV_LOCK(pa)		mtx_lock(PV_LOCKPTR(pa))
138 #define PV_UNLOCK(pa)		mtx_unlock(PV_LOCKPTR(pa))
139 #define PV_LOCKASSERT(pa) 	mtx_assert(PV_LOCKPTR(pa), MA_OWNED)
140 #define PV_PAGE_LOCK(m)		PV_LOCK(VM_PAGE_TO_PHYS(m))
141 #define PV_PAGE_UNLOCK(m)	PV_UNLOCK(VM_PAGE_TO_PHYS(m))
142 #define PV_PAGE_LOCKASSERT(m)	PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
143 
144 struct ofw_map {
145 	cell_t	om_va;
146 	cell_t	om_len;
147 	uint64_t om_pa;
148 	cell_t	om_mode;
149 };
150 
151 extern unsigned char _etext[];
152 extern unsigned char _end[];
153 
154 extern void *slbtrap, *slbtrapend;
155 
156 /*
157  * Map of physical memory regions.
158  */
159 static struct	mem_region *regions;
160 static struct	mem_region *pregions;
161 static struct	numa_mem_region *numa_pregions;
162 static u_int	phys_avail_count;
163 static int	regions_sz, pregions_sz, numapregions_sz;
164 
165 extern void bs_remap_earlyboot(void);
166 
167 /*
168  * Lock for the SLB tables.
169  */
170 struct mtx	moea64_slb_mutex;
171 
172 /*
173  * PTEG data.
174  */
175 u_long		moea64_pteg_count;
176 u_long		moea64_pteg_mask;
177 
178 /*
179  * PVO data.
180  */
181 
182 uma_zone_t	moea64_pvo_zone; /* zone for pvo entries */
183 
184 static struct	pvo_entry *moea64_bpvo_pool;
185 static int	moea64_bpvo_pool_index = 0;
186 static int	moea64_bpvo_pool_size = 327680;
187 TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size);
188 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD,
189     &moea64_bpvo_pool_index, 0, "");
190 
191 #define	VSID_NBPW	(sizeof(u_int32_t) * 8)
192 #ifdef __powerpc64__
193 #define	NVSIDS		(NPMAPS * 16)
194 #define VSID_HASHMASK	0xffffffffUL
195 #else
196 #define NVSIDS		NPMAPS
197 #define VSID_HASHMASK	0xfffffUL
198 #endif
199 static u_int	moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
200 
201 static boolean_t moea64_initialized = FALSE;
202 
203 /*
204  * Statistics.
205  */
206 u_int	moea64_pte_valid = 0;
207 u_int	moea64_pte_overflow = 0;
208 u_int	moea64_pvo_entries = 0;
209 u_int	moea64_pvo_enter_calls = 0;
210 u_int	moea64_pvo_remove_calls = 0;
211 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD,
212     &moea64_pte_valid, 0, "");
213 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
214     &moea64_pte_overflow, 0, "");
215 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD,
216     &moea64_pvo_entries, 0, "");
217 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
218     &moea64_pvo_enter_calls, 0, "");
219 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
220     &moea64_pvo_remove_calls, 0, "");
221 
222 vm_offset_t	moea64_scratchpage_va[2];
223 struct pvo_entry *moea64_scratchpage_pvo[2];
224 struct	mtx	moea64_scratchpage_mtx;
225 
226 uint64_t 	moea64_large_page_mask = 0;
227 uint64_t	moea64_large_page_size = 0;
228 int		moea64_large_page_shift = 0;
229 
230 /*
231  * PVO calls.
232  */
233 static int	moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo,
234 		    struct pvo_head *pvo_head);
235 static void	moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo);
236 static void	moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo);
237 static struct	pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
238 
239 /*
240  * Utility routines.
241  */
242 static boolean_t	moea64_query_bit(mmu_t, vm_page_t, uint64_t);
243 static u_int		moea64_clear_bit(mmu_t, vm_page_t, uint64_t);
244 static void		moea64_kremove(mmu_t, vm_offset_t);
245 static void		moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va,
246 			    vm_paddr_t pa, vm_size_t sz);
247 static void		moea64_pmap_init_qpages(void);
248 
249 /*
250  * Kernel MMU interface
251  */
252 void moea64_clear_modify(mmu_t, vm_page_t);
253 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t);
254 void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
255     vm_page_t *mb, vm_offset_t b_offset, int xfersize);
256 int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t,
257     u_int flags, int8_t psind);
258 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
259     vm_prot_t);
260 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
261 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t);
262 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
263 void moea64_init(mmu_t);
264 boolean_t moea64_is_modified(mmu_t, vm_page_t);
265 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
266 boolean_t moea64_is_referenced(mmu_t, vm_page_t);
267 int moea64_ts_referenced(mmu_t, vm_page_t);
268 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
269 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t);
270 void moea64_page_init(mmu_t, vm_page_t);
271 int moea64_page_wired_mappings(mmu_t, vm_page_t);
272 void moea64_pinit(mmu_t, pmap_t);
273 void moea64_pinit0(mmu_t, pmap_t);
274 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
275 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
276 void moea64_qremove(mmu_t, vm_offset_t, int);
277 void moea64_release(mmu_t, pmap_t);
278 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
279 void moea64_remove_pages(mmu_t, pmap_t);
280 void moea64_remove_all(mmu_t, vm_page_t);
281 void moea64_remove_write(mmu_t, vm_page_t);
282 void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
283 void moea64_zero_page(mmu_t, vm_page_t);
284 void moea64_zero_page_area(mmu_t, vm_page_t, int, int);
285 void moea64_activate(mmu_t, struct thread *);
286 void moea64_deactivate(mmu_t, struct thread *);
287 void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t);
288 void *moea64_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t);
289 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t);
290 vm_paddr_t moea64_kextract(mmu_t, vm_offset_t);
291 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma);
292 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t ma);
293 void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t);
294 boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
295 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
296 void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz,
297     void **va);
298 void moea64_scan_init(mmu_t mmu);
299 vm_offset_t moea64_quick_enter_page(mmu_t mmu, vm_page_t m);
300 void moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr);
301 static int moea64_map_user_ptr(mmu_t mmu, pmap_t pm,
302     volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
303 static int moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr,
304     int *is_user, vm_offset_t *decoded_addr);
305 
306 
307 static mmu_method_t moea64_methods[] = {
308 	MMUMETHOD(mmu_clear_modify,	moea64_clear_modify),
309 	MMUMETHOD(mmu_copy_page,	moea64_copy_page),
310 	MMUMETHOD(mmu_copy_pages,	moea64_copy_pages),
311 	MMUMETHOD(mmu_enter,		moea64_enter),
312 	MMUMETHOD(mmu_enter_object,	moea64_enter_object),
313 	MMUMETHOD(mmu_enter_quick,	moea64_enter_quick),
314 	MMUMETHOD(mmu_extract,		moea64_extract),
315 	MMUMETHOD(mmu_extract_and_hold,	moea64_extract_and_hold),
316 	MMUMETHOD(mmu_init,		moea64_init),
317 	MMUMETHOD(mmu_is_modified,	moea64_is_modified),
318 	MMUMETHOD(mmu_is_prefaultable,	moea64_is_prefaultable),
319 	MMUMETHOD(mmu_is_referenced,	moea64_is_referenced),
320 	MMUMETHOD(mmu_ts_referenced,	moea64_ts_referenced),
321 	MMUMETHOD(mmu_map,     		moea64_map),
322 	MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick),
323 	MMUMETHOD(mmu_page_init,	moea64_page_init),
324 	MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings),
325 	MMUMETHOD(mmu_pinit,		moea64_pinit),
326 	MMUMETHOD(mmu_pinit0,		moea64_pinit0),
327 	MMUMETHOD(mmu_protect,		moea64_protect),
328 	MMUMETHOD(mmu_qenter,		moea64_qenter),
329 	MMUMETHOD(mmu_qremove,		moea64_qremove),
330 	MMUMETHOD(mmu_release,		moea64_release),
331 	MMUMETHOD(mmu_remove,		moea64_remove),
332 	MMUMETHOD(mmu_remove_pages,	moea64_remove_pages),
333 	MMUMETHOD(mmu_remove_all,      	moea64_remove_all),
334 	MMUMETHOD(mmu_remove_write,	moea64_remove_write),
335 	MMUMETHOD(mmu_sync_icache,	moea64_sync_icache),
336 	MMUMETHOD(mmu_unwire,		moea64_unwire),
337 	MMUMETHOD(mmu_zero_page,       	moea64_zero_page),
338 	MMUMETHOD(mmu_zero_page_area,	moea64_zero_page_area),
339 	MMUMETHOD(mmu_activate,		moea64_activate),
340 	MMUMETHOD(mmu_deactivate,      	moea64_deactivate),
341 	MMUMETHOD(mmu_page_set_memattr,	moea64_page_set_memattr),
342 	MMUMETHOD(mmu_quick_enter_page, moea64_quick_enter_page),
343 	MMUMETHOD(mmu_quick_remove_page, moea64_quick_remove_page),
344 
345 	/* Internal interfaces */
346 	MMUMETHOD(mmu_mapdev,		moea64_mapdev),
347 	MMUMETHOD(mmu_mapdev_attr,	moea64_mapdev_attr),
348 	MMUMETHOD(mmu_unmapdev,		moea64_unmapdev),
349 	MMUMETHOD(mmu_kextract,		moea64_kextract),
350 	MMUMETHOD(mmu_kenter,		moea64_kenter),
351 	MMUMETHOD(mmu_kenter_attr,	moea64_kenter_attr),
352 	MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped),
353 	MMUMETHOD(mmu_scan_init,	moea64_scan_init),
354 	MMUMETHOD(mmu_dumpsys_map,	moea64_dumpsys_map),
355 	MMUMETHOD(mmu_map_user_ptr,	moea64_map_user_ptr),
356 	MMUMETHOD(mmu_decode_kernel_ptr, moea64_decode_kernel_ptr),
357 
358 	{ 0, 0 }
359 };
360 
361 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0);
362 
363 static struct pvo_head *
364 vm_page_to_pvoh(vm_page_t m)
365 {
366 
367 	mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED);
368 	return (&m->md.mdpg_pvoh);
369 }
370 
371 static struct pvo_entry *
372 alloc_pvo_entry(int bootstrap)
373 {
374 	struct pvo_entry *pvo;
375 
376 	if (!moea64_initialized || bootstrap) {
377 		if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) {
378 			panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd",
379 			      moea64_bpvo_pool_index, moea64_bpvo_pool_size,
380 			      moea64_bpvo_pool_size * sizeof(struct pvo_entry));
381 		}
382 		pvo = &moea64_bpvo_pool[
383 		    atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)];
384 		bzero(pvo, sizeof(*pvo));
385 		pvo->pvo_vaddr = PVO_BOOTSTRAP;
386 	} else {
387 		pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT);
388 		bzero(pvo, sizeof(*pvo));
389 	}
390 
391 	return (pvo);
392 }
393 
394 
395 static void
396 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va)
397 {
398 	uint64_t vsid;
399 	uint64_t hash;
400 	int shift;
401 
402 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
403 
404 	pvo->pvo_pmap = pmap;
405 	va &= ~ADDR_POFF;
406 	pvo->pvo_vaddr |= va;
407 	vsid = va_to_vsid(pmap, va);
408 	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
409 	    | (vsid << 16);
410 
411 	shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift :
412 	    ADDR_PIDX_SHFT;
413 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
414 	pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
415 }
416 
417 static void
418 free_pvo_entry(struct pvo_entry *pvo)
419 {
420 
421 	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
422 		uma_zfree(moea64_pvo_zone, pvo);
423 }
424 
425 void
426 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte)
427 {
428 
429 	lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) &
430 	    LPTE_AVPN_MASK;
431 	lpte->pte_hi |= LPTE_VALID;
432 
433 	if (pvo->pvo_vaddr & PVO_LARGE)
434 		lpte->pte_hi |= LPTE_BIG;
435 	if (pvo->pvo_vaddr & PVO_WIRED)
436 		lpte->pte_hi |= LPTE_WIRED;
437 	if (pvo->pvo_vaddr & PVO_HID)
438 		lpte->pte_hi |= LPTE_HID;
439 
440 	lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */
441 	if (pvo->pvo_pte.prot & VM_PROT_WRITE)
442 		lpte->pte_lo |= LPTE_BW;
443 	else
444 		lpte->pte_lo |= LPTE_BR;
445 
446 	if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE))
447 		lpte->pte_lo |= LPTE_NOEXEC;
448 }
449 
450 static __inline uint64_t
451 moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
452 {
453 	uint64_t pte_lo;
454 	int i;
455 
456 	if (ma != VM_MEMATTR_DEFAULT) {
457 		switch (ma) {
458 		case VM_MEMATTR_UNCACHEABLE:
459 			return (LPTE_I | LPTE_G);
460 		case VM_MEMATTR_CACHEABLE:
461 			return (LPTE_M);
462 		case VM_MEMATTR_WRITE_COMBINING:
463 		case VM_MEMATTR_WRITE_BACK:
464 		case VM_MEMATTR_PREFETCHABLE:
465 			return (LPTE_I);
466 		case VM_MEMATTR_WRITE_THROUGH:
467 			return (LPTE_W | LPTE_M);
468 		}
469 	}
470 
471 	/*
472 	 * Assume the page is cache inhibited and access is guarded unless
473 	 * it's in our available memory array.
474 	 */
475 	pte_lo = LPTE_I | LPTE_G;
476 	for (i = 0; i < pregions_sz; i++) {
477 		if ((pa >= pregions[i].mr_start) &&
478 		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
479 			pte_lo &= ~(LPTE_I | LPTE_G);
480 			pte_lo |= LPTE_M;
481 			break;
482 		}
483 	}
484 
485 	return pte_lo;
486 }
487 
488 /*
489  * Quick sort callout for comparing memory regions.
490  */
491 static int	om_cmp(const void *a, const void *b);
492 
493 static int
494 om_cmp(const void *a, const void *b)
495 {
496 	const struct	ofw_map *mapa;
497 	const struct	ofw_map *mapb;
498 
499 	mapa = a;
500 	mapb = b;
501 	if (mapa->om_pa < mapb->om_pa)
502 		return (-1);
503 	else if (mapa->om_pa > mapb->om_pa)
504 		return (1);
505 	else
506 		return (0);
507 }
508 
509 static void
510 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz)
511 {
512 	struct ofw_map	translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */
513 	pcell_t		acells, trans_cells[sz/sizeof(cell_t)];
514 	struct pvo_entry *pvo;
515 	register_t	msr;
516 	vm_offset_t	off;
517 	vm_paddr_t	pa_base;
518 	int		i, j;
519 
520 	bzero(translations, sz);
521 	OF_getencprop(OF_finddevice("/"), "#address-cells", &acells,
522 	    sizeof(acells));
523 	if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1)
524 		panic("moea64_bootstrap: can't get ofw translations");
525 
526 	CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
527 	sz /= sizeof(cell_t);
528 	for (i = 0, j = 0; i < sz; j++) {
529 		translations[j].om_va = trans_cells[i++];
530 		translations[j].om_len = trans_cells[i++];
531 		translations[j].om_pa = trans_cells[i++];
532 		if (acells == 2) {
533 			translations[j].om_pa <<= 32;
534 			translations[j].om_pa |= trans_cells[i++];
535 		}
536 		translations[j].om_mode = trans_cells[i++];
537 	}
538 	KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)",
539 	    i, sz));
540 
541 	sz = j;
542 	qsort(translations, sz, sizeof (*translations), om_cmp);
543 
544 	for (i = 0; i < sz; i++) {
545 		pa_base = translations[i].om_pa;
546 	      #ifndef __powerpc64__
547 		if ((translations[i].om_pa >> 32) != 0)
548 			panic("OFW translations above 32-bit boundary!");
549 	      #endif
550 
551 		if (pa_base % PAGE_SIZE)
552 			panic("OFW translation not page-aligned (phys)!");
553 		if (translations[i].om_va % PAGE_SIZE)
554 			panic("OFW translation not page-aligned (virt)!");
555 
556 		CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x",
557 		    pa_base, translations[i].om_va, translations[i].om_len);
558 
559 		/* Now enter the pages for this mapping */
560 
561 		DISABLE_TRANS(msr);
562 		for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
563 			/* If this address is direct-mapped, skip remapping */
564 			if (hw_direct_map &&
565 			    translations[i].om_va == PHYS_TO_DMAP(pa_base) &&
566 			    moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT)
567  			    == LPTE_M)
568 				continue;
569 
570 			PMAP_LOCK(kernel_pmap);
571 			pvo = moea64_pvo_find_va(kernel_pmap,
572 			    translations[i].om_va + off);
573 			PMAP_UNLOCK(kernel_pmap);
574 			if (pvo != NULL)
575 				continue;
576 
577 			moea64_kenter(mmup, translations[i].om_va + off,
578 			    pa_base + off);
579 		}
580 		ENABLE_TRANS(msr);
581 	}
582 }
583 
584 #ifdef __powerpc64__
585 static void
586 moea64_probe_large_page(void)
587 {
588 	uint16_t pvr = mfpvr() >> 16;
589 
590 	switch (pvr) {
591 	case IBM970:
592 	case IBM970FX:
593 	case IBM970MP:
594 		powerpc_sync(); isync();
595 		mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
596 		powerpc_sync(); isync();
597 
598 		/* FALLTHROUGH */
599 	default:
600 		if (moea64_large_page_size == 0) {
601 			moea64_large_page_size = 0x1000000; /* 16 MB */
602 			moea64_large_page_shift = 24;
603 		}
604 	}
605 
606 	moea64_large_page_mask = moea64_large_page_size - 1;
607 }
608 
609 static void
610 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
611 {
612 	struct slb *cache;
613 	struct slb entry;
614 	uint64_t esid, slbe;
615 	uint64_t i;
616 
617 	cache = PCPU_GET(aim.slb);
618 	esid = va >> ADDR_SR_SHFT;
619 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
620 
621 	for (i = 0; i < 64; i++) {
622 		if (cache[i].slbe == (slbe | i))
623 			return;
624 	}
625 
626 	entry.slbe = slbe;
627 	entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
628 	if (large)
629 		entry.slbv |= SLBV_L;
630 
631 	slb_insert_kernel(entry.slbe, entry.slbv);
632 }
633 #endif
634 
635 static void
636 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
637     vm_offset_t kernelend)
638 {
639 	struct pvo_entry *pvo;
640 	register_t msr;
641 	vm_paddr_t pa, pkernelstart, pkernelend;
642 	vm_offset_t size, off;
643 	uint64_t pte_lo;
644 	int i;
645 
646 	if (moea64_large_page_size == 0)
647 		hw_direct_map = 0;
648 
649 	DISABLE_TRANS(msr);
650 	if (hw_direct_map) {
651 		PMAP_LOCK(kernel_pmap);
652 		for (i = 0; i < pregions_sz; i++) {
653 		  for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
654 		     pregions[i].mr_size; pa += moea64_large_page_size) {
655 			pte_lo = LPTE_M;
656 
657 			pvo = alloc_pvo_entry(1 /* bootstrap */);
658 			pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE;
659 			init_pvo_entry(pvo, kernel_pmap, PHYS_TO_DMAP(pa));
660 
661 			/*
662 			 * Set memory access as guarded if prefetch within
663 			 * the page could exit the available physmem area.
664 			 */
665 			if (pa & moea64_large_page_mask) {
666 				pa &= moea64_large_page_mask;
667 				pte_lo |= LPTE_G;
668 			}
669 			if (pa + moea64_large_page_size >
670 			    pregions[i].mr_start + pregions[i].mr_size)
671 				pte_lo |= LPTE_G;
672 
673 			pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE |
674 			    VM_PROT_EXECUTE;
675 			pvo->pvo_pte.pa = pa | pte_lo;
676 			moea64_pvo_enter(mmup, pvo, NULL);
677 		  }
678 		}
679 		PMAP_UNLOCK(kernel_pmap);
680 	}
681 
682 	/*
683 	 * Make sure the kernel and BPVO pool stay mapped on systems either
684 	 * without a direct map or on which the kernel is not already executing
685 	 * out of the direct-mapped region.
686 	 */
687 
688 	if (!hw_direct_map || kernelstart < DMAP_BASE_ADDRESS) {
689 		pkernelstart = kernelstart & ~DMAP_BASE_ADDRESS;
690 		pkernelend = kernelend & ~DMAP_BASE_ADDRESS;
691 		for (pa = pkernelstart & ~PAGE_MASK; pa < pkernelend;
692 		    pa += PAGE_SIZE)
693 			moea64_kenter(mmup, pa | DMAP_BASE_ADDRESS, pa);
694 	}
695 
696 	if (!hw_direct_map) {
697 		size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
698 		off = (vm_offset_t)(moea64_bpvo_pool);
699 		for (pa = off; pa < off + size; pa += PAGE_SIZE)
700 			moea64_kenter(mmup, pa, pa);
701 
702 		/* Map exception vectors */
703 		for (pa = EXC_RSVD; pa < EXC_LAST; pa += PAGE_SIZE)
704 			moea64_kenter(mmup, pa | DMAP_BASE_ADDRESS, pa);
705 	}
706 	ENABLE_TRANS(msr);
707 
708 	/*
709 	 * Allow user to override unmapped_buf_allowed for testing.
710 	 * XXXKIB Only direct map implementation was tested.
711 	 */
712 	if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
713 	    &unmapped_buf_allowed))
714 		unmapped_buf_allowed = hw_direct_map;
715 }
716 
717 /* Quick sort callout for comparing physical addresses. */
718 static int
719 pa_cmp(const void *a, const void *b)
720 {
721 	const vm_paddr_t *pa = a, *pb = b;
722 
723 	if (*pa < *pb)
724 		return (-1);
725 	else if (*pa > *pb)
726 		return (1);
727 	else
728 		return (0);
729 }
730 
731 void
732 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
733 {
734 	int		i, j;
735 	vm_size_t	physsz, hwphyssz;
736 	vm_paddr_t	kernelphysstart, kernelphysend;
737 	int		rm_pavail;
738 
739 #ifndef __powerpc64__
740 	/* We don't have a direct map since there is no BAT */
741 	hw_direct_map = 0;
742 
743 	/* Make sure battable is zero, since we have no BAT */
744 	for (i = 0; i < 16; i++) {
745 		battable[i].batu = 0;
746 		battable[i].batl = 0;
747 	}
748 #else
749 	moea64_probe_large_page();
750 
751 	/* Use a direct map if we have large page support */
752 	if (moea64_large_page_size > 0)
753 		hw_direct_map = 1;
754 	else
755 		hw_direct_map = 0;
756 
757 	/* Install trap handlers for SLBs */
758 	bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap);
759 	bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap);
760 	__syncicache((void *)EXC_DSE, 0x80);
761 	__syncicache((void *)EXC_ISE, 0x80);
762 #endif
763 
764 	kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
765 	kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
766 
767 	/* Get physical memory regions from firmware */
768 	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
769 	CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
770 
771 	if (nitems(phys_avail) < regions_sz)
772 		panic("moea64_bootstrap: phys_avail too small");
773 
774 	phys_avail_count = 0;
775 	physsz = 0;
776 	hwphyssz = 0;
777 	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
778 	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
779 		CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)",
780 		    regions[i].mr_start, regions[i].mr_start +
781 		    regions[i].mr_size, regions[i].mr_size);
782 		if (hwphyssz != 0 &&
783 		    (physsz + regions[i].mr_size) >= hwphyssz) {
784 			if (physsz < hwphyssz) {
785 				phys_avail[j] = regions[i].mr_start;
786 				phys_avail[j + 1] = regions[i].mr_start +
787 				    hwphyssz - physsz;
788 				physsz = hwphyssz;
789 				phys_avail_count++;
790 			}
791 			break;
792 		}
793 		phys_avail[j] = regions[i].mr_start;
794 		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
795 		phys_avail_count++;
796 		physsz += regions[i].mr_size;
797 	}
798 
799 	/* Check for overlap with the kernel and exception vectors */
800 	rm_pavail = 0;
801 	for (j = 0; j < 2*phys_avail_count; j+=2) {
802 		if (phys_avail[j] < EXC_LAST)
803 			phys_avail[j] += EXC_LAST;
804 
805 		if (phys_avail[j] >= kernelphysstart &&
806 		    phys_avail[j+1] <= kernelphysend) {
807 			phys_avail[j] = phys_avail[j+1] = ~0;
808 			rm_pavail++;
809 			continue;
810 		}
811 
812 		if (kernelphysstart >= phys_avail[j] &&
813 		    kernelphysstart < phys_avail[j+1]) {
814 			if (kernelphysend < phys_avail[j+1]) {
815 				phys_avail[2*phys_avail_count] =
816 				    (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
817 				phys_avail[2*phys_avail_count + 1] =
818 				    phys_avail[j+1];
819 				phys_avail_count++;
820 			}
821 
822 			phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
823 		}
824 
825 		if (kernelphysend >= phys_avail[j] &&
826 		    kernelphysend < phys_avail[j+1]) {
827 			if (kernelphysstart > phys_avail[j]) {
828 				phys_avail[2*phys_avail_count] = phys_avail[j];
829 				phys_avail[2*phys_avail_count + 1] =
830 				    kernelphysstart & ~PAGE_MASK;
831 				phys_avail_count++;
832 			}
833 
834 			phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
835 			    PAGE_SIZE;
836 		}
837 	}
838 
839 	/* Remove physical available regions marked for removal (~0) */
840 	if (rm_pavail) {
841 		qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]),
842 			pa_cmp);
843 		phys_avail_count -= rm_pavail;
844 		for (i = 2*phys_avail_count;
845 		     i < 2*(phys_avail_count + rm_pavail); i+=2)
846 			phys_avail[i] = phys_avail[i+1] = 0;
847 	}
848 
849 	physmem = btoc(physsz);
850 
851 #ifdef PTEGCOUNT
852 	moea64_pteg_count = PTEGCOUNT;
853 #else
854 	moea64_pteg_count = 0x1000;
855 
856 	while (moea64_pteg_count < physmem)
857 		moea64_pteg_count <<= 1;
858 
859 	moea64_pteg_count >>= 1;
860 #endif /* PTEGCOUNT */
861 }
862 
863 void
864 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
865 {
866 	int		i;
867 
868 	/*
869 	 * Set PTEG mask
870 	 */
871 	moea64_pteg_mask = moea64_pteg_count - 1;
872 
873 	/*
874 	 * Initialize SLB table lock and page locks
875 	 */
876 	mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
877 	for (i = 0; i < PV_LOCK_COUNT; i++)
878 		mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF);
879 
880 	/*
881 	 * Initialise the bootstrap pvo pool.
882 	 */
883 	moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
884 		moea64_bpvo_pool_size*sizeof(struct pvo_entry), PAGE_SIZE);
885 	moea64_bpvo_pool_index = 0;
886 
887 	/* Place at address usable through the direct map */
888 	if (hw_direct_map)
889 		moea64_bpvo_pool = (struct pvo_entry *)
890 		    PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool);
891 
892 	/*
893 	 * Make sure kernel vsid is allocated as well as VSID 0.
894 	 */
895 	#ifndef __powerpc64__
896 	moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
897 		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
898 	moea64_vsid_bitmap[0] |= 1;
899 	#endif
900 
901 	/*
902 	 * Initialize the kernel pmap (which is statically allocated).
903 	 */
904 	#ifdef __powerpc64__
905 	for (i = 0; i < 64; i++) {
906 		pcpup->pc_aim.slb[i].slbv = 0;
907 		pcpup->pc_aim.slb[i].slbe = 0;
908 	}
909 	#else
910 	for (i = 0; i < 16; i++)
911 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
912 	#endif
913 
914 	kernel_pmap->pmap_phys = kernel_pmap;
915 	CPU_FILL(&kernel_pmap->pm_active);
916 	RB_INIT(&kernel_pmap->pmap_pvo);
917 
918 	PMAP_LOCK_INIT(kernel_pmap);
919 
920 	/*
921 	 * Now map in all the other buffers we allocated earlier
922 	 */
923 
924 	moea64_setup_direct_map(mmup, kernelstart, kernelend);
925 }
926 
927 void
928 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
929 {
930 	ihandle_t	mmui;
931 	phandle_t	chosen;
932 	phandle_t	mmu;
933 	ssize_t		sz;
934 	int		i;
935 	vm_offset_t	pa, va;
936 	void		*dpcpu;
937 
938 	/*
939 	 * Set up the Open Firmware pmap and add its mappings if not in real
940 	 * mode.
941 	 */
942 
943 	chosen = OF_finddevice("/chosen");
944 	if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) {
945 		mmu = OF_instance_to_package(mmui);
946 		if (mmu == -1 ||
947 		    (sz = OF_getproplen(mmu, "translations")) == -1)
948 			sz = 0;
949 		if (sz > 6144 /* tmpstksz - 2 KB headroom */)
950 			panic("moea64_bootstrap: too many ofw translations");
951 
952 		if (sz > 0)
953 			moea64_add_ofw_mappings(mmup, mmu, sz);
954 	}
955 
956 	/*
957 	 * Calculate the last available physical address.
958 	 */
959 	Maxmem = 0;
960 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
961 		Maxmem = MAX(Maxmem, powerpc_btop(phys_avail[i + 1]));
962 
963 	/*
964 	 * Initialize MMU.
965 	 */
966 	MMU_CPU_BOOTSTRAP(mmup,0);
967 	mtmsr(mfmsr() | PSL_DR | PSL_IR);
968 	pmap_bootstrapped++;
969 
970 	/*
971 	 * Set the start and end of kva.
972 	 */
973 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
974 	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
975 
976 	/*
977 	 * Map the entire KVA range into the SLB. We must not fault there.
978 	 */
979 	#ifdef __powerpc64__
980 	for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
981 		moea64_bootstrap_slb_prefault(va, 0);
982 	#endif
983 
984 	/*
985 	 * Remap any early IO mappings (console framebuffer, etc.)
986 	 */
987 	bs_remap_earlyboot();
988 
989 	/*
990 	 * Figure out how far we can extend virtual_end into segment 16
991 	 * without running into existing mappings. Segment 16 is guaranteed
992 	 * to contain neither RAM nor devices (at least on Apple hardware),
993 	 * but will generally contain some OFW mappings we should not
994 	 * step on.
995 	 */
996 
997 	#ifndef __powerpc64__	/* KVA is in high memory on PPC64 */
998 	PMAP_LOCK(kernel_pmap);
999 	while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
1000 	    moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
1001 		virtual_end += PAGE_SIZE;
1002 	PMAP_UNLOCK(kernel_pmap);
1003 	#endif
1004 
1005 	/*
1006 	 * Allocate a kernel stack with a guard page for thread0 and map it
1007 	 * into the kernel page map.
1008 	 */
1009 	pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE);
1010 	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1011 	virtual_avail = va + kstack_pages * PAGE_SIZE;
1012 	CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
1013 	thread0.td_kstack = va;
1014 	thread0.td_kstack_pages = kstack_pages;
1015 	for (i = 0; i < kstack_pages; i++) {
1016 		moea64_kenter(mmup, va, pa);
1017 		pa += PAGE_SIZE;
1018 		va += PAGE_SIZE;
1019 	}
1020 
1021 	/*
1022 	 * Allocate virtual address space for the message buffer.
1023 	 */
1024 	pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
1025 	msgbufp = (struct msgbuf *)virtual_avail;
1026 	va = virtual_avail;
1027 	virtual_avail += round_page(msgbufsize);
1028 	while (va < virtual_avail) {
1029 		moea64_kenter(mmup, va, pa);
1030 		pa += PAGE_SIZE;
1031 		va += PAGE_SIZE;
1032 	}
1033 
1034 	/*
1035 	 * Allocate virtual address space for the dynamic percpu area.
1036 	 */
1037 	pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
1038 	dpcpu = (void *)virtual_avail;
1039 	va = virtual_avail;
1040 	virtual_avail += DPCPU_SIZE;
1041 	while (va < virtual_avail) {
1042 		moea64_kenter(mmup, va, pa);
1043 		pa += PAGE_SIZE;
1044 		va += PAGE_SIZE;
1045 	}
1046 	dpcpu_init(dpcpu, curcpu);
1047 
1048 	/*
1049 	 * Allocate some things for page zeroing. We put this directly
1050 	 * in the page table and use MOEA64_PTE_REPLACE to avoid any
1051 	 * of the PVO book-keeping or other parts of the VM system
1052 	 * from even knowing that this hack exists.
1053 	 */
1054 
1055 	if (!hw_direct_map) {
1056 		mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
1057 		    MTX_DEF);
1058 		for (i = 0; i < 2; i++) {
1059 			moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
1060 			virtual_end -= PAGE_SIZE;
1061 
1062 			moea64_kenter(mmup, moea64_scratchpage_va[i], 0);
1063 
1064 			PMAP_LOCK(kernel_pmap);
1065 			moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
1066 			    kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
1067 			PMAP_UNLOCK(kernel_pmap);
1068 		}
1069 	}
1070 
1071 	numa_mem_regions(&numa_pregions, &numapregions_sz);
1072 }
1073 
1074 static void
1075 moea64_pmap_init_qpages(void)
1076 {
1077 	struct pcpu *pc;
1078 	int i;
1079 
1080 	if (hw_direct_map)
1081 		return;
1082 
1083 	CPU_FOREACH(i) {
1084 		pc = pcpu_find(i);
1085 		pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
1086 		if (pc->pc_qmap_addr == 0)
1087 			panic("pmap_init_qpages: unable to allocate KVA");
1088 		PMAP_LOCK(kernel_pmap);
1089 		pc->pc_aim.qmap_pvo =
1090 		    moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr);
1091 		PMAP_UNLOCK(kernel_pmap);
1092 		mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF);
1093 	}
1094 }
1095 
1096 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL);
1097 
1098 /*
1099  * Activate a user pmap.  This mostly involves setting some non-CPU
1100  * state.
1101  */
1102 void
1103 moea64_activate(mmu_t mmu, struct thread *td)
1104 {
1105 	pmap_t	pm;
1106 
1107 	pm = &td->td_proc->p_vmspace->vm_pmap;
1108 	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
1109 
1110 	#ifdef __powerpc64__
1111 	PCPU_SET(aim.userslb, pm->pm_slb);
1112 	__asm __volatile("slbmte %0, %1; isync" ::
1113 	    "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
1114 	#else
1115 	PCPU_SET(curpmap, pm->pmap_phys);
1116 	mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid);
1117 	#endif
1118 }
1119 
1120 void
1121 moea64_deactivate(mmu_t mmu, struct thread *td)
1122 {
1123 	pmap_t	pm;
1124 
1125 	__asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
1126 
1127 	pm = &td->td_proc->p_vmspace->vm_pmap;
1128 	CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
1129 	#ifdef __powerpc64__
1130 	PCPU_SET(aim.userslb, NULL);
1131 	#else
1132 	PCPU_SET(curpmap, NULL);
1133 	#endif
1134 }
1135 
1136 void
1137 moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1138 {
1139 	struct	pvo_entry key, *pvo;
1140 	vm_page_t m;
1141 	int64_t	refchg;
1142 
1143 	key.pvo_vaddr = sva;
1144 	PMAP_LOCK(pm);
1145 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
1146 	    pvo != NULL && PVO_VADDR(pvo) < eva;
1147 	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
1148 		if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1149 			panic("moea64_unwire: pvo %p is missing PVO_WIRED",
1150 			    pvo);
1151 		pvo->pvo_vaddr &= ~PVO_WIRED;
1152 		refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */);
1153 		if ((pvo->pvo_vaddr & PVO_MANAGED) &&
1154 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1155 			if (refchg < 0)
1156 				refchg = LPTE_CHG;
1157 			m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
1158 
1159 			refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
1160 			if (refchg & LPTE_CHG)
1161 				vm_page_dirty(m);
1162 			if (refchg & LPTE_REF)
1163 				vm_page_aflag_set(m, PGA_REFERENCED);
1164 		}
1165 		pm->pm_stats.wired_count--;
1166 	}
1167 	PMAP_UNLOCK(pm);
1168 }
1169 
1170 /*
1171  * This goes through and sets the physical address of our
1172  * special scratch PTE to the PA we want to zero or copy. Because
1173  * of locking issues (this can get called in pvo_enter() by
1174  * the UMA allocator), we can't use most other utility functions here
1175  */
1176 
1177 static __inline
1178 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_paddr_t pa)
1179 {
1180 	struct pvo_entry *pvo;
1181 
1182 	KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
1183 	mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
1184 
1185 	pvo = moea64_scratchpage_pvo[which];
1186 	PMAP_LOCK(pvo->pvo_pmap);
1187 	pvo->pvo_pte.pa =
1188 	    moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
1189 	MOEA64_PTE_REPLACE(mmup, pvo, MOEA64_PTE_INVALIDATE);
1190 	PMAP_UNLOCK(pvo->pvo_pmap);
1191 	isync();
1192 }
1193 
1194 void
1195 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
1196 {
1197 	vm_offset_t	dst;
1198 	vm_offset_t	src;
1199 
1200 	dst = VM_PAGE_TO_PHYS(mdst);
1201 	src = VM_PAGE_TO_PHYS(msrc);
1202 
1203 	if (hw_direct_map) {
1204 		bcopy((void *)PHYS_TO_DMAP(src), (void *)PHYS_TO_DMAP(dst),
1205 		    PAGE_SIZE);
1206 	} else {
1207 		mtx_lock(&moea64_scratchpage_mtx);
1208 
1209 		moea64_set_scratchpage_pa(mmu, 0, src);
1210 		moea64_set_scratchpage_pa(mmu, 1, dst);
1211 
1212 		bcopy((void *)moea64_scratchpage_va[0],
1213 		    (void *)moea64_scratchpage_va[1], PAGE_SIZE);
1214 
1215 		mtx_unlock(&moea64_scratchpage_mtx);
1216 	}
1217 }
1218 
1219 static inline void
1220 moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
1221     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1222 {
1223 	void *a_cp, *b_cp;
1224 	vm_offset_t a_pg_offset, b_pg_offset;
1225 	int cnt;
1226 
1227 	while (xfersize > 0) {
1228 		a_pg_offset = a_offset & PAGE_MASK;
1229 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1230 		a_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1231 		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])) +
1232 		    a_pg_offset;
1233 		b_pg_offset = b_offset & PAGE_MASK;
1234 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1235 		b_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1236 		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])) +
1237 		    b_pg_offset;
1238 		bcopy(a_cp, b_cp, cnt);
1239 		a_offset += cnt;
1240 		b_offset += cnt;
1241 		xfersize -= cnt;
1242 	}
1243 }
1244 
1245 static inline void
1246 moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
1247     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1248 {
1249 	void *a_cp, *b_cp;
1250 	vm_offset_t a_pg_offset, b_pg_offset;
1251 	int cnt;
1252 
1253 	mtx_lock(&moea64_scratchpage_mtx);
1254 	while (xfersize > 0) {
1255 		a_pg_offset = a_offset & PAGE_MASK;
1256 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1257 		moea64_set_scratchpage_pa(mmu, 0,
1258 		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
1259 		a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
1260 		b_pg_offset = b_offset & PAGE_MASK;
1261 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1262 		moea64_set_scratchpage_pa(mmu, 1,
1263 		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
1264 		b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
1265 		bcopy(a_cp, b_cp, cnt);
1266 		a_offset += cnt;
1267 		b_offset += cnt;
1268 		xfersize -= cnt;
1269 	}
1270 	mtx_unlock(&moea64_scratchpage_mtx);
1271 }
1272 
1273 void
1274 moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
1275     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1276 {
1277 
1278 	if (hw_direct_map) {
1279 		moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset,
1280 		    xfersize);
1281 	} else {
1282 		moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset,
1283 		    xfersize);
1284 	}
1285 }
1286 
1287 void
1288 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
1289 {
1290 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1291 
1292 	if (size + off > PAGE_SIZE)
1293 		panic("moea64_zero_page: size + off > PAGE_SIZE");
1294 
1295 	if (hw_direct_map) {
1296 		bzero((caddr_t)(uintptr_t)PHYS_TO_DMAP(pa) + off, size);
1297 	} else {
1298 		mtx_lock(&moea64_scratchpage_mtx);
1299 		moea64_set_scratchpage_pa(mmu, 0, pa);
1300 		bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
1301 		mtx_unlock(&moea64_scratchpage_mtx);
1302 	}
1303 }
1304 
1305 /*
1306  * Zero a page of physical memory by temporarily mapping it
1307  */
1308 void
1309 moea64_zero_page(mmu_t mmu, vm_page_t m)
1310 {
1311 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1312 	vm_offset_t va, off;
1313 
1314 	if (!hw_direct_map) {
1315 		mtx_lock(&moea64_scratchpage_mtx);
1316 
1317 		moea64_set_scratchpage_pa(mmu, 0, pa);
1318 		va = moea64_scratchpage_va[0];
1319 	} else {
1320 		va = PHYS_TO_DMAP(pa);
1321 	}
1322 
1323 	for (off = 0; off < PAGE_SIZE; off += cacheline_size)
1324 		__asm __volatile("dcbz 0,%0" :: "r"(va + off));
1325 
1326 	if (!hw_direct_map)
1327 		mtx_unlock(&moea64_scratchpage_mtx);
1328 }
1329 
1330 vm_offset_t
1331 moea64_quick_enter_page(mmu_t mmu, vm_page_t m)
1332 {
1333 	struct pvo_entry *pvo;
1334 	vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1335 
1336 	if (hw_direct_map)
1337 		return (PHYS_TO_DMAP(pa));
1338 
1339 	/*
1340  	 * MOEA64_PTE_REPLACE does some locking, so we can't just grab
1341 	 * a critical section and access the PCPU data like on i386.
1342 	 * Instead, pin the thread and grab the PCPU lock to prevent
1343 	 * a preempting thread from using the same PCPU data.
1344 	 */
1345 	sched_pin();
1346 
1347 	mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED);
1348 	pvo = PCPU_GET(aim.qmap_pvo);
1349 
1350 	mtx_lock(PCPU_PTR(aim.qmap_lock));
1351 	pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) |
1352 	    (uint64_t)pa;
1353 	MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_INVALIDATE);
1354 	isync();
1355 
1356 	return (PCPU_GET(qmap_addr));
1357 }
1358 
1359 void
1360 moea64_quick_remove_page(mmu_t mmu, vm_offset_t addr)
1361 {
1362 	if (hw_direct_map)
1363 		return;
1364 
1365 	mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED);
1366 	KASSERT(PCPU_GET(qmap_addr) == addr,
1367 	    ("moea64_quick_remove_page: invalid address"));
1368 	mtx_unlock(PCPU_PTR(aim.qmap_lock));
1369 	sched_unpin();
1370 }
1371 
1372 /*
1373  * Map the given physical page at the specified virtual address in the
1374  * target pmap with the protection requested.  If specified the page
1375  * will be wired down.
1376  */
1377 
1378 int
1379 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
1380     vm_prot_t prot, u_int flags, int8_t psind)
1381 {
1382 	struct		pvo_entry *pvo, *oldpvo;
1383 	struct		pvo_head *pvo_head;
1384 	uint64_t	pte_lo;
1385 	int		error;
1386 
1387 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1388 		VM_OBJECT_ASSERT_LOCKED(m->object);
1389 
1390 	pvo = alloc_pvo_entry(0);
1391 	pvo->pvo_pmap = NULL; /* to be filled in later */
1392 	pvo->pvo_pte.prot = prot;
1393 
1394 	pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
1395 	pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo;
1396 
1397 	if ((flags & PMAP_ENTER_WIRED) != 0)
1398 		pvo->pvo_vaddr |= PVO_WIRED;
1399 
1400 	if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) {
1401 		pvo_head = NULL;
1402 	} else {
1403 		pvo_head = &m->md.mdpg_pvoh;
1404 		pvo->pvo_vaddr |= PVO_MANAGED;
1405 	}
1406 
1407 	for (;;) {
1408 		PV_PAGE_LOCK(m);
1409 		PMAP_LOCK(pmap);
1410 		if (pvo->pvo_pmap == NULL)
1411 			init_pvo_entry(pvo, pmap, va);
1412 		if (prot & VM_PROT_WRITE)
1413 			if (pmap_bootstrapped &&
1414 			    (m->oflags & VPO_UNMANAGED) == 0)
1415 				vm_page_aflag_set(m, PGA_WRITEABLE);
1416 
1417 		oldpvo = moea64_pvo_find_va(pmap, va);
1418 		if (oldpvo != NULL) {
1419 			if (oldpvo->pvo_vaddr == pvo->pvo_vaddr &&
1420 			    oldpvo->pvo_pte.pa == pvo->pvo_pte.pa &&
1421 			    oldpvo->pvo_pte.prot == prot) {
1422 				/* Identical mapping already exists */
1423 				error = 0;
1424 
1425 				/* If not in page table, reinsert it */
1426 				if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) {
1427 					moea64_pte_overflow--;
1428 					MOEA64_PTE_INSERT(mmu, oldpvo);
1429 				}
1430 
1431 				/* Then just clean up and go home */
1432 				PV_PAGE_UNLOCK(m);
1433 				PMAP_UNLOCK(pmap);
1434 				free_pvo_entry(pvo);
1435 				break;
1436 			}
1437 
1438 			/* Otherwise, need to kill it first */
1439 			KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old "
1440 			    "mapping does not match new mapping"));
1441 			moea64_pvo_remove_from_pmap(mmu, oldpvo);
1442 		}
1443 		error = moea64_pvo_enter(mmu, pvo, pvo_head);
1444 		PV_PAGE_UNLOCK(m);
1445 		PMAP_UNLOCK(pmap);
1446 
1447 		/* Free any dead pages */
1448 		if (oldpvo != NULL) {
1449 			PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
1450 			moea64_pvo_remove_from_page(mmu, oldpvo);
1451 			PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
1452 			free_pvo_entry(oldpvo);
1453 		}
1454 
1455 		if (error != ENOMEM)
1456 			break;
1457 		if ((flags & PMAP_ENTER_NOSLEEP) != 0)
1458 			return (KERN_RESOURCE_SHORTAGE);
1459 		VM_OBJECT_ASSERT_UNLOCKED(m->object);
1460 		vm_wait(NULL);
1461 	}
1462 
1463 	/*
1464 	 * Flush the page from the instruction cache if this page is
1465 	 * mapped executable and cacheable.
1466 	 */
1467 	if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) &&
1468 	    (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
1469 		vm_page_aflag_set(m, PGA_EXECUTABLE);
1470 		moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
1471 	}
1472 	return (KERN_SUCCESS);
1473 }
1474 
1475 static void
1476 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
1477     vm_size_t sz)
1478 {
1479 
1480 	/*
1481 	 * This is much trickier than on older systems because
1482 	 * we can't sync the icache on physical addresses directly
1483 	 * without a direct map. Instead we check a couple of cases
1484 	 * where the memory is already mapped in and, failing that,
1485 	 * use the same trick we use for page zeroing to create
1486 	 * a temporary mapping for this physical address.
1487 	 */
1488 
1489 	if (!pmap_bootstrapped) {
1490 		/*
1491 		 * If PMAP is not bootstrapped, we are likely to be
1492 		 * in real mode.
1493 		 */
1494 		__syncicache((void *)(uintptr_t)pa, sz);
1495 	} else if (pmap == kernel_pmap) {
1496 		__syncicache((void *)va, sz);
1497 	} else if (hw_direct_map) {
1498 		__syncicache((void *)(uintptr_t)PHYS_TO_DMAP(pa), sz);
1499 	} else {
1500 		/* Use the scratch page to set up a temp mapping */
1501 
1502 		mtx_lock(&moea64_scratchpage_mtx);
1503 
1504 		moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF);
1505 		__syncicache((void *)(moea64_scratchpage_va[1] +
1506 		    (va & ADDR_POFF)), sz);
1507 
1508 		mtx_unlock(&moea64_scratchpage_mtx);
1509 	}
1510 }
1511 
1512 /*
1513  * Maps a sequence of resident pages belonging to the same object.
1514  * The sequence begins with the given page m_start.  This page is
1515  * mapped at the given virtual address start.  Each subsequent page is
1516  * mapped at a virtual address that is offset from start by the same
1517  * amount as the page is offset from m_start within the object.  The
1518  * last page in the sequence is the page with the largest offset from
1519  * m_start that can be mapped at a virtual address less than the given
1520  * virtual address end.  Not every virtual page between start and end
1521  * is mapped; only those for which a resident page exists with the
1522  * corresponding offset from m_start are mapped.
1523  */
1524 void
1525 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
1526     vm_page_t m_start, vm_prot_t prot)
1527 {
1528 	vm_page_t m;
1529 	vm_pindex_t diff, psize;
1530 
1531 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
1532 
1533 	psize = atop(end - start);
1534 	m = m_start;
1535 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
1536 		moea64_enter(mmu, pm, start + ptoa(diff), m, prot &
1537 		    (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0);
1538 		m = TAILQ_NEXT(m, listq);
1539 	}
1540 }
1541 
1542 void
1543 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
1544     vm_prot_t prot)
1545 {
1546 
1547 	moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
1548 	    PMAP_ENTER_NOSLEEP, 0);
1549 }
1550 
1551 vm_paddr_t
1552 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
1553 {
1554 	struct	pvo_entry *pvo;
1555 	vm_paddr_t pa;
1556 
1557 	PMAP_LOCK(pm);
1558 	pvo = moea64_pvo_find_va(pm, va);
1559 	if (pvo == NULL)
1560 		pa = 0;
1561 	else
1562 		pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo));
1563 	PMAP_UNLOCK(pm);
1564 
1565 	return (pa);
1566 }
1567 
1568 /*
1569  * Atomically extract and hold the physical page with the given
1570  * pmap and virtual address pair if that mapping permits the given
1571  * protection.
1572  */
1573 vm_page_t
1574 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1575 {
1576 	struct	pvo_entry *pvo;
1577 	vm_page_t m;
1578         vm_paddr_t pa;
1579 
1580 	m = NULL;
1581 	pa = 0;
1582 	PMAP_LOCK(pmap);
1583 retry:
1584 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1585 	if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
1586 		if (vm_page_pa_tryrelock(pmap,
1587 		    pvo->pvo_pte.pa & LPTE_RPGN, &pa))
1588 			goto retry;
1589 		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
1590 		vm_page_hold(m);
1591 	}
1592 	PA_UNLOCK_COND(pa);
1593 	PMAP_UNLOCK(pmap);
1594 	return (m);
1595 }
1596 
1597 static mmu_t installed_mmu;
1598 
1599 static void *
1600 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
1601     uint8_t *flags, int wait)
1602 {
1603 	struct pvo_entry *pvo;
1604         vm_offset_t va;
1605         vm_page_t m;
1606         int needed_lock;
1607 
1608 	/*
1609 	 * This entire routine is a horrible hack to avoid bothering kmem
1610 	 * for new KVA addresses. Because this can get called from inside
1611 	 * kmem allocation routines, calling kmem for a new address here
1612 	 * can lead to multiply locking non-recursive mutexes.
1613 	 */
1614 
1615 	*flags = UMA_SLAB_PRIV;
1616 	needed_lock = !PMAP_LOCKED(kernel_pmap);
1617 
1618 	m = vm_page_alloc_domain(NULL, 0, domain,
1619 	    malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1620 	if (m == NULL)
1621 		return (NULL);
1622 
1623 	va = VM_PAGE_TO_PHYS(m);
1624 
1625 	pvo = alloc_pvo_entry(1 /* bootstrap */);
1626 
1627 	pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE;
1628 	pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M;
1629 
1630 	if (needed_lock)
1631 		PMAP_LOCK(kernel_pmap);
1632 
1633 	init_pvo_entry(pvo, kernel_pmap, va);
1634 	pvo->pvo_vaddr |= PVO_WIRED;
1635 
1636 	moea64_pvo_enter(installed_mmu, pvo, NULL);
1637 
1638 	if (needed_lock)
1639 		PMAP_UNLOCK(kernel_pmap);
1640 
1641 	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
1642                 bzero((void *)va, PAGE_SIZE);
1643 
1644 	return (void *)va;
1645 }
1646 
1647 extern int elf32_nxstack;
1648 
1649 void
1650 moea64_init(mmu_t mmu)
1651 {
1652 
1653 	CTR0(KTR_PMAP, "moea64_init");
1654 
1655 	moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1656 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1657 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
1658 
1659 	if (!hw_direct_map) {
1660 		installed_mmu = mmu;
1661 		uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
1662 	}
1663 
1664 #ifdef COMPAT_FREEBSD32
1665 	elf32_nxstack = 1;
1666 #endif
1667 
1668 	moea64_initialized = TRUE;
1669 }
1670 
1671 boolean_t
1672 moea64_is_referenced(mmu_t mmu, vm_page_t m)
1673 {
1674 
1675 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1676 	    ("moea64_is_referenced: page %p is not managed", m));
1677 
1678 	return (moea64_query_bit(mmu, m, LPTE_REF));
1679 }
1680 
1681 boolean_t
1682 moea64_is_modified(mmu_t mmu, vm_page_t m)
1683 {
1684 
1685 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1686 	    ("moea64_is_modified: page %p is not managed", m));
1687 
1688 	/*
1689 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
1690 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
1691 	 * is clear, no PTEs can have LPTE_CHG set.
1692 	 */
1693 	VM_OBJECT_ASSERT_LOCKED(m->object);
1694 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
1695 		return (FALSE);
1696 	return (moea64_query_bit(mmu, m, LPTE_CHG));
1697 }
1698 
1699 boolean_t
1700 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va)
1701 {
1702 	struct pvo_entry *pvo;
1703 	boolean_t rv = TRUE;
1704 
1705 	PMAP_LOCK(pmap);
1706 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1707 	if (pvo != NULL)
1708 		rv = FALSE;
1709 	PMAP_UNLOCK(pmap);
1710 	return (rv);
1711 }
1712 
1713 void
1714 moea64_clear_modify(mmu_t mmu, vm_page_t m)
1715 {
1716 
1717 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1718 	    ("moea64_clear_modify: page %p is not managed", m));
1719 	VM_OBJECT_ASSERT_WLOCKED(m->object);
1720 	KASSERT(!vm_page_xbusied(m),
1721 	    ("moea64_clear_modify: page %p is exclusive busied", m));
1722 
1723 	/*
1724 	 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG
1725 	 * set.  If the object containing the page is locked and the page is
1726 	 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
1727 	 */
1728 	if ((m->aflags & PGA_WRITEABLE) == 0)
1729 		return;
1730 	moea64_clear_bit(mmu, m, LPTE_CHG);
1731 }
1732 
1733 /*
1734  * Clear the write and modified bits in each of the given page's mappings.
1735  */
1736 void
1737 moea64_remove_write(mmu_t mmu, vm_page_t m)
1738 {
1739 	struct	pvo_entry *pvo;
1740 	int64_t	refchg, ret;
1741 	pmap_t	pmap;
1742 
1743 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1744 	    ("moea64_remove_write: page %p is not managed", m));
1745 
1746 	/*
1747 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
1748 	 * set by another thread while the object is locked.  Thus,
1749 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
1750 	 */
1751 	VM_OBJECT_ASSERT_WLOCKED(m->object);
1752 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
1753 		return;
1754 	powerpc_sync();
1755 	PV_PAGE_LOCK(m);
1756 	refchg = 0;
1757 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
1758 		pmap = pvo->pvo_pmap;
1759 		PMAP_LOCK(pmap);
1760 		if (!(pvo->pvo_vaddr & PVO_DEAD) &&
1761 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1762 			pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
1763 			ret = MOEA64_PTE_REPLACE(mmu, pvo,
1764 			    MOEA64_PTE_PROT_UPDATE);
1765 			if (ret < 0)
1766 				ret = LPTE_CHG;
1767 			refchg |= ret;
1768 			if (pvo->pvo_pmap == kernel_pmap)
1769 				isync();
1770 		}
1771 		PMAP_UNLOCK(pmap);
1772 	}
1773 	if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
1774 		vm_page_dirty(m);
1775 	vm_page_aflag_clear(m, PGA_WRITEABLE);
1776 	PV_PAGE_UNLOCK(m);
1777 }
1778 
1779 /*
1780  *	moea64_ts_referenced:
1781  *
1782  *	Return a count of reference bits for a page, clearing those bits.
1783  *	It is not necessary for every reference bit to be cleared, but it
1784  *	is necessary that 0 only be returned when there are truly no
1785  *	reference bits set.
1786  *
1787  *	XXX: The exact number of bits to check and clear is a matter that
1788  *	should be tested and standardized at some point in the future for
1789  *	optimal aging of shared pages.
1790  */
1791 int
1792 moea64_ts_referenced(mmu_t mmu, vm_page_t m)
1793 {
1794 
1795 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1796 	    ("moea64_ts_referenced: page %p is not managed", m));
1797 	return (moea64_clear_bit(mmu, m, LPTE_REF));
1798 }
1799 
1800 /*
1801  * Modify the WIMG settings of all mappings for a page.
1802  */
1803 void
1804 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
1805 {
1806 	struct	pvo_entry *pvo;
1807 	int64_t	refchg;
1808 	pmap_t	pmap;
1809 	uint64_t lo;
1810 
1811 	if ((m->oflags & VPO_UNMANAGED) != 0) {
1812 		m->md.mdpg_cache_attrs = ma;
1813 		return;
1814 	}
1815 
1816 	lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
1817 
1818 	PV_PAGE_LOCK(m);
1819 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
1820 		pmap = pvo->pvo_pmap;
1821 		PMAP_LOCK(pmap);
1822 		if (!(pvo->pvo_vaddr & PVO_DEAD)) {
1823 			pvo->pvo_pte.pa &= ~LPTE_WIMG;
1824 			pvo->pvo_pte.pa |= lo;
1825 			refchg = MOEA64_PTE_REPLACE(mmu, pvo,
1826 			    MOEA64_PTE_INVALIDATE);
1827 			if (refchg < 0)
1828 				refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
1829 				    LPTE_CHG : 0;
1830 			if ((pvo->pvo_vaddr & PVO_MANAGED) &&
1831 			    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1832 				refchg |=
1833 				    atomic_readandclear_32(&m->md.mdpg_attrs);
1834 				if (refchg & LPTE_CHG)
1835 					vm_page_dirty(m);
1836 				if (refchg & LPTE_REF)
1837 					vm_page_aflag_set(m, PGA_REFERENCED);
1838 			}
1839 			if (pvo->pvo_pmap == kernel_pmap)
1840 				isync();
1841 		}
1842 		PMAP_UNLOCK(pmap);
1843 	}
1844 	m->md.mdpg_cache_attrs = ma;
1845 	PV_PAGE_UNLOCK(m);
1846 }
1847 
1848 /*
1849  * Map a wired page into kernel virtual address space.
1850  */
1851 void
1852 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
1853 {
1854 	int		error;
1855 	struct pvo_entry *pvo, *oldpvo;
1856 
1857 	pvo = alloc_pvo_entry(0);
1858 	pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
1859 	pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma);
1860 	pvo->pvo_vaddr |= PVO_WIRED;
1861 
1862 	PMAP_LOCK(kernel_pmap);
1863 	oldpvo = moea64_pvo_find_va(kernel_pmap, va);
1864 	if (oldpvo != NULL)
1865 		moea64_pvo_remove_from_pmap(mmu, oldpvo);
1866 	init_pvo_entry(pvo, kernel_pmap, va);
1867 	error = moea64_pvo_enter(mmu, pvo, NULL);
1868 	PMAP_UNLOCK(kernel_pmap);
1869 
1870 	/* Free any dead pages */
1871 	if (oldpvo != NULL) {
1872 		PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
1873 		moea64_pvo_remove_from_page(mmu, oldpvo);
1874 		PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN);
1875 		free_pvo_entry(oldpvo);
1876 	}
1877 
1878 	if (error != 0 && error != ENOENT)
1879 		panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va,
1880 		    (uintmax_t)pa, error);
1881 }
1882 
1883 void
1884 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
1885 {
1886 
1887 	moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
1888 }
1889 
1890 /*
1891  * Extract the physical page address associated with the given kernel virtual
1892  * address.
1893  */
1894 vm_paddr_t
1895 moea64_kextract(mmu_t mmu, vm_offset_t va)
1896 {
1897 	struct		pvo_entry *pvo;
1898 	vm_paddr_t pa;
1899 
1900 	/*
1901 	 * Shortcut the direct-mapped case when applicable.  We never put
1902 	 * anything but 1:1 (or 62-bit aliased) mappings below
1903 	 * VM_MIN_KERNEL_ADDRESS.
1904 	 */
1905 	if (va < VM_MIN_KERNEL_ADDRESS)
1906 		return (va & ~DMAP_BASE_ADDRESS);
1907 
1908 	PMAP_LOCK(kernel_pmap);
1909 	pvo = moea64_pvo_find_va(kernel_pmap, va);
1910 	KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
1911 	    va));
1912 	pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo));
1913 	PMAP_UNLOCK(kernel_pmap);
1914 	return (pa);
1915 }
1916 
1917 /*
1918  * Remove a wired page from kernel virtual address space.
1919  */
1920 void
1921 moea64_kremove(mmu_t mmu, vm_offset_t va)
1922 {
1923 	moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
1924 }
1925 
1926 /*
1927  * Provide a kernel pointer corresponding to a given userland pointer.
1928  * The returned pointer is valid until the next time this function is
1929  * called in this thread. This is used internally in copyin/copyout.
1930  */
1931 static int
1932 moea64_map_user_ptr(mmu_t mmu, pmap_t pm, volatile const void *uaddr,
1933     void **kaddr, size_t ulen, size_t *klen)
1934 {
1935 	size_t l;
1936 #ifdef __powerpc64__
1937 	struct slb *slb;
1938 #endif
1939 	register_t slbv;
1940 
1941 	*kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK);
1942 	l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr);
1943 	if (l > ulen)
1944 		l = ulen;
1945 	if (klen)
1946 		*klen = l;
1947 	else if (l != ulen)
1948 		return (EFAULT);
1949 
1950 #ifdef __powerpc64__
1951 	/* Try lockless look-up first */
1952 	slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr);
1953 
1954 	if (slb == NULL) {
1955 		/* If it isn't there, we need to pre-fault the VSID */
1956 		PMAP_LOCK(pm);
1957 		slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT;
1958 		PMAP_UNLOCK(pm);
1959 	} else {
1960 		slbv = slb->slbv;
1961 	}
1962 
1963 	/* Mark segment no-execute */
1964 	slbv |= SLBV_N;
1965 #else
1966 	slbv = va_to_vsid(pm, (vm_offset_t)uaddr);
1967 
1968 	/* Mark segment no-execute */
1969 	slbv |= SR_N;
1970 #endif
1971 
1972 	/* If we have already set this VSID, we can just return */
1973 	if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv)
1974 		return (0);
1975 
1976 	__asm __volatile("isync");
1977 	curthread->td_pcb->pcb_cpu.aim.usr_segm =
1978 	    (uintptr_t)uaddr >> ADDR_SR_SHFT;
1979 	curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv;
1980 #ifdef __powerpc64__
1981 	__asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
1982 	    "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
1983 #else
1984 	__asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv));
1985 #endif
1986 
1987 	return (0);
1988 }
1989 
1990 /*
1991  * Figure out where a given kernel pointer (usually in a fault) points
1992  * to from the VM's perspective, potentially remapping into userland's
1993  * address space.
1994  */
1995 static int
1996 moea64_decode_kernel_ptr(mmu_t mmu, vm_offset_t addr, int *is_user,
1997     vm_offset_t *decoded_addr)
1998 {
1999 	vm_offset_t user_sr;
2000 
2001 	if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
2002 		user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm;
2003 		addr &= ADDR_PIDX | ADDR_POFF;
2004 		addr |= user_sr << ADDR_SR_SHFT;
2005 		*decoded_addr = addr;
2006 		*is_user = 1;
2007 	} else {
2008 		*decoded_addr = addr;
2009 		*is_user = 0;
2010 	}
2011 
2012 	return (0);
2013 }
2014 
2015 /*
2016  * Map a range of physical addresses into kernel virtual address space.
2017  *
2018  * The value passed in *virt is a suggested virtual address for the mapping.
2019  * Architectures which can support a direct-mapped physical to virtual region
2020  * can return the appropriate address within that region, leaving '*virt'
2021  * unchanged.  Other architectures should map the pages starting at '*virt' and
2022  * update '*virt' with the first usable address after the mapped region.
2023  */
2024 vm_offset_t
2025 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
2026     vm_paddr_t pa_end, int prot)
2027 {
2028 	vm_offset_t	sva, va;
2029 
2030 	if (hw_direct_map) {
2031 		/*
2032 		 * Check if every page in the region is covered by the direct
2033 		 * map. The direct map covers all of physical memory. Use
2034 		 * moea64_calc_wimg() as a shortcut to see if the page is in
2035 		 * physical memory as a way to see if the direct map covers it.
2036 		 */
2037 		for (va = pa_start; va < pa_end; va += PAGE_SIZE)
2038 			if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M)
2039 				break;
2040 		if (va == pa_end)
2041 			return (PHYS_TO_DMAP(pa_start));
2042 	}
2043 	sva = *virt;
2044 	va = sva;
2045 	/* XXX respect prot argument */
2046 	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
2047 		moea64_kenter(mmu, va, pa_start);
2048 	*virt = va;
2049 
2050 	return (sva);
2051 }
2052 
2053 /*
2054  * Returns true if the pmap's pv is one of the first
2055  * 16 pvs linked to from this page.  This count may
2056  * be changed upwards or downwards in the future; it
2057  * is only necessary that true be returned for a small
2058  * subset of pmaps for proper page aging.
2059  */
2060 boolean_t
2061 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
2062 {
2063         int loops;
2064 	struct pvo_entry *pvo;
2065 	boolean_t rv;
2066 
2067 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2068 	    ("moea64_page_exists_quick: page %p is not managed", m));
2069 	loops = 0;
2070 	rv = FALSE;
2071 	PV_PAGE_LOCK(m);
2072 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2073 		if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) {
2074 			rv = TRUE;
2075 			break;
2076 		}
2077 		if (++loops >= 16)
2078 			break;
2079 	}
2080 	PV_PAGE_UNLOCK(m);
2081 	return (rv);
2082 }
2083 
2084 void
2085 moea64_page_init(mmu_t mmu __unused, vm_page_t m)
2086 {
2087 
2088 	m->md.mdpg_attrs = 0;
2089 	m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT;
2090 	LIST_INIT(&m->md.mdpg_pvoh);
2091 }
2092 
2093 /*
2094  * Return the number of managed mappings to the given physical page
2095  * that are wired.
2096  */
2097 int
2098 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m)
2099 {
2100 	struct pvo_entry *pvo;
2101 	int count;
2102 
2103 	count = 0;
2104 	if ((m->oflags & VPO_UNMANAGED) != 0)
2105 		return (count);
2106 	PV_PAGE_LOCK(m);
2107 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
2108 		if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED)
2109 			count++;
2110 	PV_PAGE_UNLOCK(m);
2111 	return (count);
2112 }
2113 
2114 static uintptr_t	moea64_vsidcontext;
2115 
2116 uintptr_t
2117 moea64_get_unique_vsid(void) {
2118 	u_int entropy;
2119 	register_t hash;
2120 	uint32_t mask;
2121 	int i;
2122 
2123 	entropy = 0;
2124 	__asm __volatile("mftb %0" : "=r"(entropy));
2125 
2126 	mtx_lock(&moea64_slb_mutex);
2127 	for (i = 0; i < NVSIDS; i += VSID_NBPW) {
2128 		u_int	n;
2129 
2130 		/*
2131 		 * Create a new value by mutiplying by a prime and adding in
2132 		 * entropy from the timebase register.  This is to make the
2133 		 * VSID more random so that the PT hash function collides
2134 		 * less often.  (Note that the prime casues gcc to do shifts
2135 		 * instead of a multiply.)
2136 		 */
2137 		moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
2138 		hash = moea64_vsidcontext & (NVSIDS - 1);
2139 		if (hash == 0)		/* 0 is special, avoid it */
2140 			continue;
2141 		n = hash >> 5;
2142 		mask = 1 << (hash & (VSID_NBPW - 1));
2143 		hash = (moea64_vsidcontext & VSID_HASHMASK);
2144 		if (moea64_vsid_bitmap[n] & mask) {	/* collision? */
2145 			/* anything free in this bucket? */
2146 			if (moea64_vsid_bitmap[n] == 0xffffffff) {
2147 				entropy = (moea64_vsidcontext >> 20);
2148 				continue;
2149 			}
2150 			i = ffs(~moea64_vsid_bitmap[n]) - 1;
2151 			mask = 1 << i;
2152 			hash &= rounddown2(VSID_HASHMASK, VSID_NBPW);
2153 			hash |= i;
2154 		}
2155 		if (hash == VSID_VRMA)	/* also special, avoid this too */
2156 			continue;
2157 		KASSERT(!(moea64_vsid_bitmap[n] & mask),
2158 		    ("Allocating in-use VSID %#zx\n", hash));
2159 		moea64_vsid_bitmap[n] |= mask;
2160 		mtx_unlock(&moea64_slb_mutex);
2161 		return (hash);
2162 	}
2163 
2164 	mtx_unlock(&moea64_slb_mutex);
2165 	panic("%s: out of segments",__func__);
2166 }
2167 
2168 #ifdef __powerpc64__
2169 void
2170 moea64_pinit(mmu_t mmu, pmap_t pmap)
2171 {
2172 
2173 	RB_INIT(&pmap->pmap_pvo);
2174 
2175 	pmap->pm_slb_tree_root = slb_alloc_tree();
2176 	pmap->pm_slb = slb_alloc_user_cache();
2177 	pmap->pm_slb_len = 0;
2178 }
2179 #else
2180 void
2181 moea64_pinit(mmu_t mmu, pmap_t pmap)
2182 {
2183 	int	i;
2184 	uint32_t hash;
2185 
2186 	RB_INIT(&pmap->pmap_pvo);
2187 
2188 	if (pmap_bootstrapped)
2189 		pmap->pmap_phys = (pmap_t)moea64_kextract(mmu,
2190 		    (vm_offset_t)pmap);
2191 	else
2192 		pmap->pmap_phys = pmap;
2193 
2194 	/*
2195 	 * Allocate some segment registers for this pmap.
2196 	 */
2197 	hash = moea64_get_unique_vsid();
2198 
2199 	for (i = 0; i < 16; i++)
2200 		pmap->pm_sr[i] = VSID_MAKE(i, hash);
2201 
2202 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
2203 }
2204 #endif
2205 
2206 /*
2207  * Initialize the pmap associated with process 0.
2208  */
2209 void
2210 moea64_pinit0(mmu_t mmu, pmap_t pm)
2211 {
2212 
2213 	PMAP_LOCK_INIT(pm);
2214 	moea64_pinit(mmu, pm);
2215 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
2216 }
2217 
2218 /*
2219  * Set the physical protection on the specified range of this map as requested.
2220  */
2221 static void
2222 moea64_pvo_protect(mmu_t mmu,  pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
2223 {
2224 	struct vm_page *pg;
2225 	vm_prot_t oldprot;
2226 	int32_t refchg;
2227 
2228 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
2229 
2230 	/*
2231 	 * Change the protection of the page.
2232 	 */
2233 	oldprot = pvo->pvo_pte.prot;
2234 	pvo->pvo_pte.prot = prot;
2235 	pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
2236 
2237 	/*
2238 	 * If the PVO is in the page table, update mapping
2239 	 */
2240 	refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE);
2241 	if (refchg < 0)
2242 		refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
2243 
2244 	if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) &&
2245 	    (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
2246 		if ((pg->oflags & VPO_UNMANAGED) == 0)
2247 			vm_page_aflag_set(pg, PGA_EXECUTABLE);
2248 		moea64_syncicache(mmu, pm, PVO_VADDR(pvo),
2249 		    pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE);
2250 	}
2251 
2252 	/*
2253 	 * Update vm about the REF/CHG bits if the page is managed and we have
2254 	 * removed write access.
2255 	 */
2256 	if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
2257 	    (oldprot & VM_PROT_WRITE)) {
2258 		refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2259 		if (refchg & LPTE_CHG)
2260 			vm_page_dirty(pg);
2261 		if (refchg & LPTE_REF)
2262 			vm_page_aflag_set(pg, PGA_REFERENCED);
2263 	}
2264 }
2265 
2266 void
2267 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2268     vm_prot_t prot)
2269 {
2270 	struct	pvo_entry *pvo, *tpvo, key;
2271 
2272 	CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
2273 	    sva, eva, prot);
2274 
2275 	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
2276 	    ("moea64_protect: non current pmap"));
2277 
2278 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2279 		moea64_remove(mmu, pm, sva, eva);
2280 		return;
2281 	}
2282 
2283 	PMAP_LOCK(pm);
2284 	key.pvo_vaddr = sva;
2285 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2286 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
2287 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
2288 		moea64_pvo_protect(mmu, pm, pvo, prot);
2289 	}
2290 	PMAP_UNLOCK(pm);
2291 }
2292 
2293 /*
2294  * Map a list of wired pages into kernel virtual address space.  This is
2295  * intended for temporary mappings which do not need page modification or
2296  * references recorded.  Existing mappings in the region are overwritten.
2297  */
2298 void
2299 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count)
2300 {
2301 	while (count-- > 0) {
2302 		moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
2303 		va += PAGE_SIZE;
2304 		m++;
2305 	}
2306 }
2307 
2308 /*
2309  * Remove page mappings from kernel virtual address space.  Intended for
2310  * temporary mappings entered by moea64_qenter.
2311  */
2312 void
2313 moea64_qremove(mmu_t mmu, vm_offset_t va, int count)
2314 {
2315 	while (count-- > 0) {
2316 		moea64_kremove(mmu, va);
2317 		va += PAGE_SIZE;
2318 	}
2319 }
2320 
2321 void
2322 moea64_release_vsid(uint64_t vsid)
2323 {
2324 	int idx, mask;
2325 
2326 	mtx_lock(&moea64_slb_mutex);
2327 	idx = vsid & (NVSIDS-1);
2328 	mask = 1 << (idx % VSID_NBPW);
2329 	idx /= VSID_NBPW;
2330 	KASSERT(moea64_vsid_bitmap[idx] & mask,
2331 	    ("Freeing unallocated VSID %#jx", vsid));
2332 	moea64_vsid_bitmap[idx] &= ~mask;
2333 	mtx_unlock(&moea64_slb_mutex);
2334 }
2335 
2336 
2337 void
2338 moea64_release(mmu_t mmu, pmap_t pmap)
2339 {
2340 
2341 	/*
2342 	 * Free segment registers' VSIDs
2343 	 */
2344     #ifdef __powerpc64__
2345 	slb_free_tree(pmap);
2346 	slb_free_user_cache(pmap->pm_slb);
2347     #else
2348 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
2349 
2350 	moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
2351     #endif
2352 }
2353 
2354 /*
2355  * Remove all pages mapped by the specified pmap
2356  */
2357 void
2358 moea64_remove_pages(mmu_t mmu, pmap_t pm)
2359 {
2360 	struct pvo_entry *pvo, *tpvo;
2361 	struct pvo_tree tofree;
2362 
2363 	RB_INIT(&tofree);
2364 
2365 	PMAP_LOCK(pm);
2366 	RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
2367 		if (pvo->pvo_vaddr & PVO_WIRED)
2368 			continue;
2369 
2370 		/*
2371 		 * For locking reasons, remove this from the page table and
2372 		 * pmap, but save delinking from the vm_page for a second
2373 		 * pass
2374 		 */
2375 		moea64_pvo_remove_from_pmap(mmu, pvo);
2376 		RB_INSERT(pvo_tree, &tofree, pvo);
2377 	}
2378 	PMAP_UNLOCK(pm);
2379 
2380 	RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) {
2381 		PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN);
2382 		moea64_pvo_remove_from_page(mmu, pvo);
2383 		PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN);
2384 		RB_REMOVE(pvo_tree, &tofree, pvo);
2385 		free_pvo_entry(pvo);
2386 	}
2387 }
2388 
2389 /*
2390  * Remove the given range of addresses from the specified map.
2391  */
2392 void
2393 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
2394 {
2395 	struct  pvo_entry *pvo, *tpvo, key;
2396 	struct pvo_tree tofree;
2397 
2398 	/*
2399 	 * Perform an unsynchronized read.  This is, however, safe.
2400 	 */
2401 	if (pm->pm_stats.resident_count == 0)
2402 		return;
2403 
2404 	key.pvo_vaddr = sva;
2405 
2406 	RB_INIT(&tofree);
2407 
2408 	PMAP_LOCK(pm);
2409 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2410 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
2411 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
2412 
2413 		/*
2414 		 * For locking reasons, remove this from the page table and
2415 		 * pmap, but save delinking from the vm_page for a second
2416 		 * pass
2417 		 */
2418 		moea64_pvo_remove_from_pmap(mmu, pvo);
2419 		RB_INSERT(pvo_tree, &tofree, pvo);
2420 	}
2421 	PMAP_UNLOCK(pm);
2422 
2423 	RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) {
2424 		PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN);
2425 		moea64_pvo_remove_from_page(mmu, pvo);
2426 		PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN);
2427 		RB_REMOVE(pvo_tree, &tofree, pvo);
2428 		free_pvo_entry(pvo);
2429 	}
2430 }
2431 
2432 /*
2433  * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
2434  * will reflect changes in pte's back to the vm_page.
2435  */
2436 void
2437 moea64_remove_all(mmu_t mmu, vm_page_t m)
2438 {
2439 	struct	pvo_entry *pvo, *next_pvo;
2440 	struct	pvo_head freequeue;
2441 	int	wasdead;
2442 	pmap_t	pmap;
2443 
2444 	LIST_INIT(&freequeue);
2445 
2446 	PV_PAGE_LOCK(m);
2447 	LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
2448 		pmap = pvo->pvo_pmap;
2449 		PMAP_LOCK(pmap);
2450 		wasdead = (pvo->pvo_vaddr & PVO_DEAD);
2451 		if (!wasdead)
2452 			moea64_pvo_remove_from_pmap(mmu, pvo);
2453 		moea64_pvo_remove_from_page(mmu, pvo);
2454 		if (!wasdead)
2455 			LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
2456 		PMAP_UNLOCK(pmap);
2457 
2458 	}
2459 	KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));
2460 	KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable"));
2461 	PV_PAGE_UNLOCK(m);
2462 
2463 	/* Clean up UMA allocations */
2464 	LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo)
2465 		free_pvo_entry(pvo);
2466 }
2467 
2468 /*
2469  * Allocate a physical page of memory directly from the phys_avail map.
2470  * Can only be called from moea64_bootstrap before avail start and end are
2471  * calculated.
2472  */
2473 vm_offset_t
2474 moea64_bootstrap_alloc(vm_size_t size, vm_size_t align)
2475 {
2476 	vm_offset_t	s, e;
2477 	int		i, j;
2478 
2479 	size = round_page(size);
2480 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2481 		if (align != 0)
2482 			s = roundup2(phys_avail[i], align);
2483 		else
2484 			s = phys_avail[i];
2485 		e = s + size;
2486 
2487 		if (s < phys_avail[i] || e > phys_avail[i + 1])
2488 			continue;
2489 
2490 		if (s + size > platform_real_maxaddr())
2491 			continue;
2492 
2493 		if (s == phys_avail[i]) {
2494 			phys_avail[i] += size;
2495 		} else if (e == phys_avail[i + 1]) {
2496 			phys_avail[i + 1] -= size;
2497 		} else {
2498 			for (j = phys_avail_count * 2; j > i; j -= 2) {
2499 				phys_avail[j] = phys_avail[j - 2];
2500 				phys_avail[j + 1] = phys_avail[j - 1];
2501 			}
2502 
2503 			phys_avail[i + 3] = phys_avail[i + 1];
2504 			phys_avail[i + 1] = s;
2505 			phys_avail[i + 2] = e;
2506 			phys_avail_count++;
2507 		}
2508 
2509 		return (s);
2510 	}
2511 	panic("moea64_bootstrap_alloc: could not allocate memory");
2512 }
2513 
2514 static int
2515 moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head)
2516 {
2517 	int first, err;
2518 
2519 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2520 	KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL,
2521 	    ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo)));
2522 
2523 	moea64_pvo_enter_calls++;
2524 
2525 	/*
2526 	 * Add to pmap list
2527 	 */
2528 	RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2529 
2530 	/*
2531 	 * Remember if the list was empty and therefore will be the first
2532 	 * item.
2533 	 */
2534 	if (pvo_head != NULL) {
2535 		if (LIST_FIRST(pvo_head) == NULL)
2536 			first = 1;
2537 		LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
2538 	}
2539 
2540 	if (pvo->pvo_vaddr & PVO_WIRED)
2541 		pvo->pvo_pmap->pm_stats.wired_count++;
2542 	pvo->pvo_pmap->pm_stats.resident_count++;
2543 
2544 	/*
2545 	 * Insert it into the hardware page table
2546 	 */
2547 	err = MOEA64_PTE_INSERT(mmu, pvo);
2548 	if (err != 0) {
2549 		panic("moea64_pvo_enter: overflow");
2550 	}
2551 
2552 	moea64_pvo_entries++;
2553 
2554 	if (pvo->pvo_pmap == kernel_pmap)
2555 		isync();
2556 
2557 #ifdef __powerpc64__
2558 	/*
2559 	 * Make sure all our bootstrap mappings are in the SLB as soon
2560 	 * as virtual memory is switched on.
2561 	 */
2562 	if (!pmap_bootstrapped)
2563 		moea64_bootstrap_slb_prefault(PVO_VADDR(pvo),
2564 		    pvo->pvo_vaddr & PVO_LARGE);
2565 #endif
2566 
2567 	return (first ? ENOENT : 0);
2568 }
2569 
2570 static void
2571 moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo)
2572 {
2573 	struct	vm_page *pg;
2574 	int32_t refchg;
2575 
2576 	KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap"));
2577 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2578 	KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO"));
2579 
2580 	/*
2581 	 * If there is an active pte entry, we need to deactivate it
2582 	 */
2583 	refchg = MOEA64_PTE_UNSET(mmu, pvo);
2584 	if (refchg < 0) {
2585 		/*
2586 		 * If it was evicted from the page table, be pessimistic and
2587 		 * dirty the page.
2588 		 */
2589 		if (pvo->pvo_pte.prot & VM_PROT_WRITE)
2590 			refchg = LPTE_CHG;
2591 		else
2592 			refchg = 0;
2593 	}
2594 
2595 	/*
2596 	 * Update our statistics.
2597 	 */
2598 	pvo->pvo_pmap->pm_stats.resident_count--;
2599 	if (pvo->pvo_vaddr & PVO_WIRED)
2600 		pvo->pvo_pmap->pm_stats.wired_count--;
2601 
2602 	/*
2603 	 * Remove this PVO from the pmap list.
2604 	 */
2605 	RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2606 
2607 	/*
2608 	 * Mark this for the next sweep
2609 	 */
2610 	pvo->pvo_vaddr |= PVO_DEAD;
2611 
2612 	/* Send RC bits to VM */
2613 	if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2614 	    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2615 		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
2616 		if (pg != NULL) {
2617 			refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2618 			if (refchg & LPTE_CHG)
2619 				vm_page_dirty(pg);
2620 			if (refchg & LPTE_REF)
2621 				vm_page_aflag_set(pg, PGA_REFERENCED);
2622 		}
2623 	}
2624 }
2625 
2626 static void
2627 moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo)
2628 {
2629 	struct	vm_page *pg;
2630 
2631 	KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page"));
2632 
2633 	/* Use NULL pmaps as a sentinel for races in page deletion */
2634 	if (pvo->pvo_pmap == NULL)
2635 		return;
2636 	pvo->pvo_pmap = NULL;
2637 
2638 	/*
2639 	 * Update vm about page writeability/executability if managed
2640 	 */
2641 	PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN);
2642 	if (pvo->pvo_vaddr & PVO_MANAGED) {
2643 		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
2644 
2645 		if (pg != NULL) {
2646 			LIST_REMOVE(pvo, pvo_vlink);
2647 			if (LIST_EMPTY(vm_page_to_pvoh(pg)))
2648 				vm_page_aflag_clear(pg,
2649 				    PGA_WRITEABLE | PGA_EXECUTABLE);
2650 		}
2651 	}
2652 
2653 	moea64_pvo_entries--;
2654 	moea64_pvo_remove_calls++;
2655 }
2656 
2657 static struct pvo_entry *
2658 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
2659 {
2660 	struct pvo_entry key;
2661 
2662 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
2663 
2664 	key.pvo_vaddr = va & ~ADDR_POFF;
2665 	return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
2666 }
2667 
2668 static boolean_t
2669 moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit)
2670 {
2671 	struct	pvo_entry *pvo;
2672 	int64_t ret;
2673 	boolean_t rv;
2674 
2675 	/*
2676 	 * See if this bit is stored in the page already.
2677 	 */
2678 	if (m->md.mdpg_attrs & ptebit)
2679 		return (TRUE);
2680 
2681 	/*
2682 	 * Examine each PTE.  Sync so that any pending REF/CHG bits are
2683 	 * flushed to the PTEs.
2684 	 */
2685 	rv = FALSE;
2686 	powerpc_sync();
2687 	PV_PAGE_LOCK(m);
2688 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2689 		ret = 0;
2690 
2691 		/*
2692 		 * See if this pvo has a valid PTE.  if so, fetch the
2693 		 * REF/CHG bits from the valid PTE.  If the appropriate
2694 		 * ptebit is set, return success.
2695 		 */
2696 		PMAP_LOCK(pvo->pvo_pmap);
2697 		if (!(pvo->pvo_vaddr & PVO_DEAD))
2698 			ret = MOEA64_PTE_SYNCH(mmu, pvo);
2699 		PMAP_UNLOCK(pvo->pvo_pmap);
2700 
2701 		if (ret > 0) {
2702 			atomic_set_32(&m->md.mdpg_attrs,
2703 			    ret & (LPTE_CHG | LPTE_REF));
2704 			if (ret & ptebit) {
2705 				rv = TRUE;
2706 				break;
2707 			}
2708 		}
2709 	}
2710 	PV_PAGE_UNLOCK(m);
2711 
2712 	return (rv);
2713 }
2714 
2715 static u_int
2716 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit)
2717 {
2718 	u_int	count;
2719 	struct	pvo_entry *pvo;
2720 	int64_t ret;
2721 
2722 	/*
2723 	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
2724 	 * we can reset the right ones).
2725 	 */
2726 	powerpc_sync();
2727 
2728 	/*
2729 	 * For each pvo entry, clear the pte's ptebit.
2730 	 */
2731 	count = 0;
2732 	PV_PAGE_LOCK(m);
2733 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2734 		ret = 0;
2735 
2736 		PMAP_LOCK(pvo->pvo_pmap);
2737 		if (!(pvo->pvo_vaddr & PVO_DEAD))
2738 			ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit);
2739 		PMAP_UNLOCK(pvo->pvo_pmap);
2740 
2741 		if (ret > 0 && (ret & ptebit))
2742 			count++;
2743 	}
2744 	atomic_clear_32(&m->md.mdpg_attrs, ptebit);
2745 	PV_PAGE_UNLOCK(m);
2746 
2747 	return (count);
2748 }
2749 
2750 boolean_t
2751 moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
2752 {
2753 	struct pvo_entry *pvo, key;
2754 	vm_offset_t ppa;
2755 	int error = 0;
2756 
2757 	if (hw_direct_map && mem_valid(pa, size) == 0)
2758 		return (0);
2759 
2760 	PMAP_LOCK(kernel_pmap);
2761 	ppa = pa & ~ADDR_POFF;
2762 	key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa;
2763 	for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
2764 	    ppa < pa + size; ppa += PAGE_SIZE,
2765 	    pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
2766 		if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) {
2767 			error = EFAULT;
2768 			break;
2769 		}
2770 	}
2771 	PMAP_UNLOCK(kernel_pmap);
2772 
2773 	return (error);
2774 }
2775 
2776 /*
2777  * Map a set of physical memory pages into the kernel virtual
2778  * address space. Return a pointer to where it is mapped. This
2779  * routine is intended to be used for mapping device memory,
2780  * NOT real memory.
2781  */
2782 void *
2783 moea64_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
2784 {
2785 	vm_offset_t va, tmpva, ppa, offset;
2786 
2787 	ppa = trunc_page(pa);
2788 	offset = pa & PAGE_MASK;
2789 	size = roundup2(offset + size, PAGE_SIZE);
2790 
2791 	va = kva_alloc(size);
2792 
2793 	if (!va)
2794 		panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
2795 
2796 	for (tmpva = va; size > 0;) {
2797 		moea64_kenter_attr(mmu, tmpva, ppa, ma);
2798 		size -= PAGE_SIZE;
2799 		tmpva += PAGE_SIZE;
2800 		ppa += PAGE_SIZE;
2801 	}
2802 
2803 	return ((void *)(va + offset));
2804 }
2805 
2806 void *
2807 moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
2808 {
2809 
2810 	return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT);
2811 }
2812 
2813 void
2814 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
2815 {
2816 	vm_offset_t base, offset;
2817 
2818 	base = trunc_page(va);
2819 	offset = va & PAGE_MASK;
2820 	size = roundup2(offset + size, PAGE_SIZE);
2821 
2822 	kva_free(base, size);
2823 }
2824 
2825 void
2826 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
2827 {
2828 	struct pvo_entry *pvo;
2829 	vm_offset_t lim;
2830 	vm_paddr_t pa;
2831 	vm_size_t len;
2832 
2833 	PMAP_LOCK(pm);
2834 	while (sz > 0) {
2835 		lim = round_page(va+1);
2836 		len = MIN(lim - va, sz);
2837 		pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
2838 		if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) {
2839 			pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF);
2840 			moea64_syncicache(mmu, pm, va, pa, len);
2841 		}
2842 		va += len;
2843 		sz -= len;
2844 	}
2845 	PMAP_UNLOCK(pm);
2846 }
2847 
2848 void
2849 moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va)
2850 {
2851 
2852 	*va = (void *)(uintptr_t)pa;
2853 }
2854 
2855 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
2856 
2857 void
2858 moea64_scan_init(mmu_t mmu)
2859 {
2860 	struct pvo_entry *pvo;
2861 	vm_offset_t va;
2862 	int i;
2863 
2864 	if (!do_minidump) {
2865 		/* Initialize phys. segments for dumpsys(). */
2866 		memset(&dump_map, 0, sizeof(dump_map));
2867 		mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
2868 		for (i = 0; i < pregions_sz; i++) {
2869 			dump_map[i].pa_start = pregions[i].mr_start;
2870 			dump_map[i].pa_size = pregions[i].mr_size;
2871 		}
2872 		return;
2873 	}
2874 
2875 	/* Virtual segments for minidumps: */
2876 	memset(&dump_map, 0, sizeof(dump_map));
2877 
2878 	/* 1st: kernel .data and .bss. */
2879 	dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
2880 	dump_map[0].pa_size = round_page((uintptr_t)_end) -
2881 	    dump_map[0].pa_start;
2882 
2883 	/* 2nd: msgbuf and tables (see pmap_bootstrap()). */
2884 	dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr;
2885 	dump_map[1].pa_size = round_page(msgbufp->msg_size);
2886 
2887 	/* 3rd: kernel VM. */
2888 	va = dump_map[1].pa_start + dump_map[1].pa_size;
2889 	/* Find start of next chunk (from va). */
2890 	while (va < virtual_end) {
2891 		/* Don't dump the buffer cache. */
2892 		if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
2893 			va = kmi.buffer_eva;
2894 			continue;
2895 		}
2896 		pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
2897 		if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
2898 			break;
2899 		va += PAGE_SIZE;
2900 	}
2901 	if (va < virtual_end) {
2902 		dump_map[2].pa_start = va;
2903 		va += PAGE_SIZE;
2904 		/* Find last page in chunk. */
2905 		while (va < virtual_end) {
2906 			/* Don't run into the buffer cache. */
2907 			if (va == kmi.buffer_sva)
2908 				break;
2909 			pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
2910 			if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD))
2911 				break;
2912 			va += PAGE_SIZE;
2913 		}
2914 		dump_map[2].pa_size = va - dump_map[2].pa_start;
2915 	}
2916 }
2917 
2918