1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008-2015 Nathan Whitehorn
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 /*
31 * Manages physical address maps.
32 *
33 * Since the information managed by this module is also stored by the
34 * logical address mapping module, this module may throw away valid virtual
35 * to physical mappings at almost any time. However, invalidations of
36 * mappings must be done as requested.
37 *
38 * In order to cope with hardware architectures which make virtual to
39 * physical map invalidates expensive, this module may delay invalidate
40 * reduced protection operations until such time as they are actually
41 * necessary. This module is given full information as to which processors
42 * are currently using which maps, and to when physical maps must be made
43 * correct.
44 */
45
46 #include "opt_kstack_pages.h"
47
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/conf.h>
51 #include <sys/queue.h>
52 #include <sys/cpuset.h>
53 #include <sys/kerneldump.h>
54 #include <sys/ktr.h>
55 #include <sys/lock.h>
56 #include <sys/msgbuf.h>
57 #include <sys/malloc.h>
58 #include <sys/mman.h>
59 #include <sys/mutex.h>
60 #include <sys/proc.h>
61 #include <sys/rwlock.h>
62 #include <sys/sched.h>
63 #include <sys/sysctl.h>
64 #include <sys/systm.h>
65 #include <sys/vmmeter.h>
66 #include <sys/smp.h>
67 #include <sys/reboot.h>
68
69 #include <sys/kdb.h>
70
71 #include <dev/ofw/openfirm.h>
72
73 #include <vm/vm.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_param.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_phys.h>
79 #include <vm/vm_map.h>
80 #include <vm/vm_object.h>
81 #include <vm/vm_extern.h>
82 #include <vm/vm_pageout.h>
83 #include <vm/vm_dumpset.h>
84 #include <vm/vm_radix.h>
85 #include <vm/vm_reserv.h>
86 #include <vm/uma.h>
87
88 #include <machine/_inttypes.h>
89 #include <machine/cpu.h>
90 #include <machine/ifunc.h>
91 #include <machine/platform.h>
92 #include <machine/frame.h>
93 #include <machine/md_var.h>
94 #include <machine/psl.h>
95 #include <machine/bat.h>
96 #include <machine/hid.h>
97 #include <machine/pte.h>
98 #include <machine/sr.h>
99 #include <machine/trap.h>
100 #include <machine/mmuvar.h>
101
102 #include "mmu_oea64.h"
103
104 void moea64_release_vsid(uint64_t vsid);
105 uintptr_t moea64_get_unique_vsid(void);
106
107 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR)
108 #define ENABLE_TRANS(msr) mtmsr(msr)
109
110 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4))
111 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff)
112 #define VSID_HASH_MASK 0x0000007fffffffffULL
113
114 /*
115 * Locking semantics:
116 *
117 * There are two locks of interest: the page locks and the pmap locks, which
118 * protect their individual PVO lists and are locked in that order. The contents
119 * of all PVO entries are protected by the locks of their respective pmaps.
120 * The pmap of any PVO is guaranteed not to change so long as the PVO is linked
121 * into any list.
122 *
123 */
124
125 #define PV_LOCK_COUNT MAXCPU
126 static struct rwlock __exclusive_cache_line pv_lock[PV_LOCK_COUNT];
127
128 #define PV_LOCK_SHIFT HPT_SP_SHIFT
129 #define pa_index(pa) ((pa) >> PV_LOCK_SHIFT)
130
131 /*
132 * Cheap NUMA-izing of the pv locks, to reduce contention across domains.
133 * NUMA domains on POWER9 appear to be indexed as sparse memory spaces, with the
134 * index at (N << 45).
135 */
136 #ifdef __powerpc64__
137 #define PV_LOCK_IDX(pa) ((pa_index(pa) * (((pa) >> 45) + 1)) % PV_LOCK_COUNT)
138 #else
139 #define PV_LOCK_IDX(pa) (pa_index(pa) % PV_LOCK_COUNT)
140 #endif
141 #define PV_LOCKPTR(pa) ((struct rwlock *)(&pv_lock[PV_LOCK_IDX(pa)]))
142
143 #define PV_WR_LOCK(pa) rw_wlock(PV_LOCKPTR(pa))
144 #define PV_RD_LOCK(pa) rw_rlock(PV_LOCKPTR(pa))
145 #define PV_UNLOCK(pa) rw_unlock(PV_LOCKPTR(pa))
146 #define PV_LOCKASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_LOCKED)
147 #define PV_LOCK_RD_ASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_RLOCKED)
148 #define PV_LOCK_WR_ASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_WLOCKED)
149
150 #define PV_PAGE_WR_LOCK(m) PV_WR_LOCK(VM_PAGE_TO_PHYS(m))
151 #define PV_PAGE_RD_LOCK(m) PV_RD_LOCK(VM_PAGE_TO_PHYS(m))
152 #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m))
153 #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
154
155 struct ofw_map {
156 cell_t om_va;
157 cell_t om_len;
158 uint64_t om_pa;
159 cell_t om_mode;
160 };
161
162 extern unsigned char _etext[];
163 extern unsigned char _end[];
164
165 extern void *slbtrap, *slbtrapend;
166
167 /*
168 * Map of physical memory regions.
169 */
170 static struct mem_region *regions;
171 static struct mem_region *pregions;
172 static struct numa_mem_region *numa_pregions;
173 static int regions_sz, pregions_sz, numapregions_sz;
174
175 u_int phys_avail_count;
176
177 extern void bs_remap_earlyboot(void);
178
179 /*
180 * Lock for the SLB tables.
181 */
182 struct mtx moea64_slb_mutex;
183
184 /*
185 * PTEG data.
186 */
187 u_long moea64_pteg_count;
188 u_long moea64_pteg_mask;
189
190 /*
191 * PVO data.
192 */
193
194 uma_zone_t moea64_pvo_zone; /* zone for pvo entries */
195
196 static struct pvo_entry *moea64_bpvo_pool;
197 static int moea64_bpvo_pool_index = 0;
198 static int moea64_bpvo_pool_size = 0;
199 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD,
200 &moea64_bpvo_pool_index, 0, "");
201
202 #define BPVO_POOL_SIZE 327680 /* Sensible historical default value */
203 #define BPVO_POOL_EXPANSION_FACTOR 3
204 #define VSID_NBPW (sizeof(u_int32_t) * 8)
205 #ifdef __powerpc64__
206 #define NVSIDS (NPMAPS * 16)
207 #define VSID_HASHMASK 0xffffffffUL
208 #else
209 #define NVSIDS NPMAPS
210 #define VSID_HASHMASK 0xfffffUL
211 #endif
212 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
213
214 static bool moea64_initialized = false;
215
216 #ifdef MOEA64_STATS
217 /*
218 * Statistics.
219 */
220 u_int moea64_pte_valid = 0;
221 u_int moea64_pte_overflow = 0;
222 u_int moea64_pvo_entries = 0;
223 u_int moea64_pvo_enter_calls = 0;
224 u_int moea64_pvo_remove_calls = 0;
225 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD,
226 &moea64_pte_valid, 0, "");
227 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
228 &moea64_pte_overflow, 0, "");
229 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD,
230 &moea64_pvo_entries, 0, "");
231 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
232 &moea64_pvo_enter_calls, 0, "");
233 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
234 &moea64_pvo_remove_calls, 0, "");
235 #endif
236
237 vm_offset_t moea64_scratchpage_va[2];
238 struct pvo_entry *moea64_scratchpage_pvo[2];
239 struct mtx moea64_scratchpage_mtx;
240
241 uint64_t moea64_large_page_mask = 0;
242 uint64_t moea64_large_page_size = 0;
243 int moea64_large_page_shift = 0;
244 bool moea64_has_lp_4k_16m = false;
245
246 /*
247 * PVO calls.
248 */
249 static int moea64_pvo_enter(struct pvo_entry *pvo,
250 struct pvo_head *pvo_head, struct pvo_entry **oldpvo);
251 static void moea64_pvo_remove_from_pmap(struct pvo_entry *pvo);
252 static void moea64_pvo_remove_from_page(struct pvo_entry *pvo);
253 static void moea64_pvo_remove_from_page_locked(
254 struct pvo_entry *pvo, vm_page_t m);
255 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
256
257 /*
258 * Utility routines.
259 */
260 static bool moea64_query_bit(vm_page_t, uint64_t);
261 static u_int moea64_clear_bit(vm_page_t, uint64_t);
262 static void moea64_kremove(vm_offset_t);
263 static void moea64_syncicache(pmap_t pmap, vm_offset_t va,
264 vm_paddr_t pa, vm_size_t sz);
265 static void moea64_pmap_init_qpages(void *);
266 static void moea64_remove_locked(pmap_t, vm_offset_t,
267 vm_offset_t, struct pvo_dlist *);
268
269 /*
270 * Superpages data and routines.
271 */
272
273 /*
274 * PVO flags (in vaddr) that must match for promotion to succeed.
275 * Note that protection bits are checked separately, as they reside in
276 * another field.
277 */
278 #define PVO_FLAGS_PROMOTE (PVO_WIRED | PVO_MANAGED | PVO_PTEGIDX_VALID)
279
280 #define PVO_IS_SP(pvo) (((pvo)->pvo_vaddr & PVO_LARGE) && \
281 (pvo)->pvo_pmap != kernel_pmap)
282
283 /* Get physical address from PVO. */
284 #define PVO_PADDR(pvo) moea64_pvo_paddr(pvo)
285
286 /* MD page flag indicating that the page is a superpage. */
287 #define MDPG_ATTR_SP 0x40000000
288
289 SYSCTL_DECL(_vm_pmap);
290
291 static SYSCTL_NODE(_vm_pmap, OID_AUTO, sp, CTLFLAG_RD, 0,
292 "SP page mapping counters");
293
294 static u_long sp_demotions;
295 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, demotions, CTLFLAG_RD,
296 &sp_demotions, 0, "SP page demotions");
297
298 static u_long sp_mappings;
299 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, mappings, CTLFLAG_RD,
300 &sp_mappings, 0, "SP page mappings");
301
302 static u_long sp_p_failures;
303 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_failures, CTLFLAG_RD,
304 &sp_p_failures, 0, "SP page promotion failures");
305
306 static u_long sp_p_fail_pa;
307 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_pa, CTLFLAG_RD,
308 &sp_p_fail_pa, 0, "SP page promotion failure: PAs don't match");
309
310 static u_long sp_p_fail_flags;
311 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_flags, CTLFLAG_RD,
312 &sp_p_fail_flags, 0, "SP page promotion failure: page flags don't match");
313
314 static u_long sp_p_fail_prot;
315 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_prot, CTLFLAG_RD,
316 &sp_p_fail_prot, 0,
317 "SP page promotion failure: page protections don't match");
318
319 static u_long sp_p_fail_wimg;
320 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_wimg, CTLFLAG_RD,
321 &sp_p_fail_wimg, 0, "SP page promotion failure: WIMG bits don't match");
322
323 static u_long sp_promotions;
324 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, promotions, CTLFLAG_RD,
325 &sp_promotions, 0, "SP page promotions");
326
327 static bool moea64_ps_enabled(pmap_t);
328 static void moea64_align_superpage(vm_object_t, vm_ooffset_t,
329 vm_offset_t *, vm_size_t);
330
331 static int moea64_sp_enter(pmap_t pmap, vm_offset_t va,
332 vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind);
333 static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp,
334 struct pvo_dlist *tofree);
335
336 #if VM_NRESERVLEVEL > 0
337 static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m);
338 #endif
339 static void moea64_sp_demote_aligned(struct pvo_entry *sp);
340 static void moea64_sp_demote(struct pvo_entry *pvo);
341
342 static struct pvo_entry *moea64_sp_unwire(struct pvo_entry *sp);
343 static struct pvo_entry *moea64_sp_protect(struct pvo_entry *sp,
344 vm_prot_t prot);
345
346 static int64_t moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit);
347 static int64_t moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m,
348 uint64_t ptebit);
349
350 static __inline bool moea64_sp_pvo_in_range(struct pvo_entry *pvo,
351 vm_offset_t sva, vm_offset_t eva);
352
353 /*
354 * Kernel MMU interface
355 */
356 void moea64_clear_modify(vm_page_t);
357 void moea64_copy_page(vm_page_t, vm_page_t);
358 void moea64_copy_page_dmap(vm_page_t, vm_page_t);
359 void moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
360 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
361 void moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
362 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
363 int moea64_enter(pmap_t, vm_offset_t, vm_page_t, vm_prot_t,
364 u_int flags, int8_t psind);
365 void moea64_enter_object(pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
366 vm_prot_t);
367 void moea64_enter_quick(pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
368 vm_paddr_t moea64_extract(pmap_t, vm_offset_t);
369 vm_page_t moea64_extract_and_hold(pmap_t, vm_offset_t, vm_prot_t);
370 void moea64_init(void);
371 bool moea64_is_modified(vm_page_t);
372 bool moea64_is_prefaultable(pmap_t, vm_offset_t);
373 bool moea64_is_referenced(vm_page_t);
374 int moea64_ts_referenced(vm_page_t);
375 void *moea64_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
376 bool moea64_page_exists_quick(pmap_t, vm_page_t);
377 void moea64_page_init(vm_page_t);
378 int moea64_page_wired_mappings(vm_page_t);
379 int moea64_pinit(pmap_t);
380 void moea64_pinit0(pmap_t);
381 void moea64_protect(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
382 void moea64_qenter(void *, vm_page_t *, int);
383 void moea64_qremove(void *, int);
384 void moea64_release(pmap_t);
385 void moea64_remove(pmap_t, vm_offset_t, vm_offset_t);
386 void moea64_remove_pages(pmap_t);
387 void moea64_remove_all(vm_page_t);
388 void moea64_remove_write(vm_page_t);
389 void moea64_unwire(pmap_t, vm_offset_t, vm_offset_t);
390 void moea64_zero_page(vm_page_t);
391 void moea64_zero_page_dmap(vm_page_t);
392 void moea64_zero_page_area(vm_page_t, int, int);
393 void moea64_activate(struct thread *);
394 void moea64_deactivate(struct thread *);
395 void *moea64_mapdev(vm_paddr_t, vm_size_t);
396 void *moea64_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t);
397 void moea64_unmapdev(void *, vm_size_t);
398 vm_paddr_t moea64_kextract(vm_offset_t);
399 void moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma);
400 void moea64_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t ma);
401 void moea64_kenter(vm_offset_t, vm_paddr_t);
402 int moea64_dev_direct_mapped(vm_paddr_t, vm_size_t);
403 static void moea64_sync_icache(pmap_t, vm_offset_t, vm_size_t);
404 void moea64_dumpsys_map(vm_paddr_t pa, size_t sz,
405 void **va);
406 void moea64_scan_init(void);
407 void *moea64_quick_enter_page(vm_page_t m);
408 void *moea64_quick_enter_page_dmap(vm_page_t m);
409 void moea64_quick_remove_page(void *addr);
410 bool moea64_page_is_mapped(vm_page_t m);
411 static int moea64_map_user_ptr(pmap_t pm,
412 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
413 static int moea64_decode_kernel_ptr(vm_offset_t addr,
414 int *is_user, vm_offset_t *decoded_addr);
415 static size_t moea64_scan_pmap(struct bitset *dump_bitset);
416 static void *moea64_dump_pmap_init(unsigned blkpgs);
417 #ifdef __powerpc64__
418 static void moea64_page_array_startup(long);
419 #endif
420 static int moea64_mincore(pmap_t, vm_offset_t, vm_paddr_t *);
421
422 static struct pmap_funcs moea64_methods = {
423 .clear_modify = moea64_clear_modify,
424 .copy_page = moea64_copy_page,
425 .copy_pages = moea64_copy_pages,
426 .enter = moea64_enter,
427 .enter_object = moea64_enter_object,
428 .enter_quick = moea64_enter_quick,
429 .extract = moea64_extract,
430 .extract_and_hold = moea64_extract_and_hold,
431 .init = moea64_init,
432 .is_modified = moea64_is_modified,
433 .is_prefaultable = moea64_is_prefaultable,
434 .is_referenced = moea64_is_referenced,
435 .ts_referenced = moea64_ts_referenced,
436 .map = moea64_map,
437 .mincore = moea64_mincore,
438 .page_exists_quick = moea64_page_exists_quick,
439 .page_init = moea64_page_init,
440 .page_wired_mappings = moea64_page_wired_mappings,
441 .pinit = moea64_pinit,
442 .pinit0 = moea64_pinit0,
443 .protect = moea64_protect,
444 .qenter = moea64_qenter,
445 .qremove = moea64_qremove,
446 .release = moea64_release,
447 .remove = moea64_remove,
448 .remove_pages = moea64_remove_pages,
449 .remove_all = moea64_remove_all,
450 .remove_write = moea64_remove_write,
451 .sync_icache = moea64_sync_icache,
452 .unwire = moea64_unwire,
453 .zero_page = moea64_zero_page,
454 .zero_page_area = moea64_zero_page_area,
455 .activate = moea64_activate,
456 .deactivate = moea64_deactivate,
457 .page_set_memattr = moea64_page_set_memattr,
458 .quick_enter_page = moea64_quick_enter_page,
459 .quick_remove_page = moea64_quick_remove_page,
460 .page_is_mapped = moea64_page_is_mapped,
461 #ifdef __powerpc64__
462 .page_array_startup = moea64_page_array_startup,
463 #endif
464 .ps_enabled = moea64_ps_enabled,
465 .align_superpage = moea64_align_superpage,
466
467 /* Internal interfaces */
468 .mapdev = moea64_mapdev,
469 .mapdev_attr = moea64_mapdev_attr,
470 .unmapdev = moea64_unmapdev,
471 .kextract = moea64_kextract,
472 .kenter = moea64_kenter,
473 .kenter_attr = moea64_kenter_attr,
474 .dev_direct_mapped = moea64_dev_direct_mapped,
475 .dumpsys_pa_init = moea64_scan_init,
476 .dumpsys_scan_pmap = moea64_scan_pmap,
477 .dumpsys_dump_pmap_init = moea64_dump_pmap_init,
478 .dumpsys_map_chunk = moea64_dumpsys_map,
479 .map_user_ptr = moea64_map_user_ptr,
480 .decode_kernel_ptr = moea64_decode_kernel_ptr,
481 };
482
483 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods);
484
485 /*
486 * Get physical address from PVO.
487 *
488 * For superpages, the lower bits are not stored on pvo_pte.pa and must be
489 * obtained from VA.
490 */
491 static __always_inline vm_paddr_t
moea64_pvo_paddr(struct pvo_entry * pvo)492 moea64_pvo_paddr(struct pvo_entry *pvo)
493 {
494 vm_paddr_t pa;
495
496 pa = (pvo)->pvo_pte.pa & LPTE_RPGN;
497
498 if (PVO_IS_SP(pvo)) {
499 pa &= ~HPT_SP_MASK; /* This is needed to clear LPTE_LP bits. */
500 pa |= PVO_VADDR(pvo) & HPT_SP_MASK;
501 }
502 return (pa);
503 }
504
505 static struct pvo_head *
vm_page_to_pvoh(vm_page_t m)506 vm_page_to_pvoh(vm_page_t m)
507 {
508
509 rw_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), RA_LOCKED);
510 return (&m->md.mdpg_pvoh);
511 }
512
513 static struct pvo_entry *
alloc_pvo_entry(int bootstrap)514 alloc_pvo_entry(int bootstrap)
515 {
516 struct pvo_entry *pvo;
517
518 if (!moea64_initialized || bootstrap) {
519 if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) {
520 panic("%s: bpvo pool exhausted, index=%d, size=%d, bytes=%zd."
521 "Try setting machdep.moea64_bpvo_pool_size tunable",
522 __func__, moea64_bpvo_pool_index,
523 moea64_bpvo_pool_size,
524 moea64_bpvo_pool_size * sizeof(struct pvo_entry));
525 }
526 pvo = &moea64_bpvo_pool[
527 atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)];
528 bzero(pvo, sizeof(*pvo));
529 pvo->pvo_vaddr = PVO_BOOTSTRAP;
530 } else
531 pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT | M_ZERO);
532
533 return (pvo);
534 }
535
536 static void
init_pvo_entry(struct pvo_entry * pvo,pmap_t pmap,vm_offset_t va)537 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va)
538 {
539 uint64_t vsid;
540 uint64_t hash;
541 int shift;
542
543 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
544
545 pvo->pvo_pmap = pmap;
546 va &= ~ADDR_POFF;
547 pvo->pvo_vaddr |= va;
548 vsid = va_to_vsid(pmap, va);
549 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
550 | (vsid << 16);
551
552 if (pmap == kernel_pmap && (pvo->pvo_vaddr & PVO_LARGE) != 0)
553 shift = moea64_large_page_shift;
554 else
555 shift = ADDR_PIDX_SHFT;
556 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
557 pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
558 }
559
560 static void
free_pvo_entry(struct pvo_entry * pvo)561 free_pvo_entry(struct pvo_entry *pvo)
562 {
563
564 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
565 uma_zfree(moea64_pvo_zone, pvo);
566 }
567
568 void
moea64_pte_from_pvo(const struct pvo_entry * pvo,struct lpte * lpte)569 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte)
570 {
571
572 lpte->pte_hi = moea64_pte_vpn_from_pvo_vpn(pvo);
573 lpte->pte_hi |= LPTE_VALID;
574
575 if (pvo->pvo_vaddr & PVO_LARGE)
576 lpte->pte_hi |= LPTE_BIG;
577 if (pvo->pvo_vaddr & PVO_WIRED)
578 lpte->pte_hi |= LPTE_WIRED;
579 if (pvo->pvo_vaddr & PVO_HID)
580 lpte->pte_hi |= LPTE_HID;
581
582 lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */
583 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
584 lpte->pte_lo |= LPTE_BW;
585 else
586 lpte->pte_lo |= LPTE_BR;
587
588 if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE))
589 lpte->pte_lo |= LPTE_NOEXEC;
590 }
591
592 static __inline uint64_t
moea64_calc_wimg(vm_paddr_t pa,vm_memattr_t ma)593 moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
594 {
595 uint64_t pte_lo;
596 int i;
597
598 if (ma != VM_MEMATTR_DEFAULT) {
599 switch (ma) {
600 case VM_MEMATTR_UNCACHEABLE:
601 return (LPTE_I | LPTE_G);
602 case VM_MEMATTR_CACHEABLE:
603 return (LPTE_M);
604 case VM_MEMATTR_WRITE_COMBINING:
605 case VM_MEMATTR_WRITE_BACK:
606 case VM_MEMATTR_PREFETCHABLE:
607 return (LPTE_I);
608 case VM_MEMATTR_WRITE_THROUGH:
609 return (LPTE_W | LPTE_M);
610 }
611 }
612
613 /*
614 * Assume the page is cache inhibited and access is guarded unless
615 * it's in our available memory array.
616 */
617 pte_lo = LPTE_I | LPTE_G;
618 for (i = 0; i < pregions_sz; i++) {
619 if ((pa >= pregions[i].mr_start) &&
620 (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
621 pte_lo &= ~(LPTE_I | LPTE_G);
622 pte_lo |= LPTE_M;
623 break;
624 }
625 }
626
627 return pte_lo;
628 }
629
630 /*
631 * Quick sort callout for comparing memory regions.
632 */
633 static int om_cmp(const void *a, const void *b);
634
635 static int
om_cmp(const void * a,const void * b)636 om_cmp(const void *a, const void *b)
637 {
638 const struct ofw_map *mapa;
639 const struct ofw_map *mapb;
640
641 mapa = a;
642 mapb = b;
643 if (mapa->om_pa < mapb->om_pa)
644 return (-1);
645 else if (mapa->om_pa > mapb->om_pa)
646 return (1);
647 else
648 return (0);
649 }
650
651 static void
moea64_add_ofw_mappings(phandle_t mmu,size_t sz)652 moea64_add_ofw_mappings(phandle_t mmu, size_t sz)
653 {
654 struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */
655 pcell_t acells, trans_cells[sz/sizeof(cell_t)];
656 struct pvo_entry *pvo;
657 register_t msr;
658 vm_offset_t off;
659 vm_paddr_t pa_base;
660 int i, j;
661
662 bzero(translations, sz);
663 OF_getencprop(OF_finddevice("/"), "#address-cells", &acells,
664 sizeof(acells));
665 if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1)
666 panic("moea64_bootstrap: can't get ofw translations");
667
668 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
669 sz /= sizeof(cell_t);
670 for (i = 0, j = 0; i < sz; j++) {
671 translations[j].om_va = trans_cells[i++];
672 translations[j].om_len = trans_cells[i++];
673 translations[j].om_pa = trans_cells[i++];
674 if (acells == 2) {
675 translations[j].om_pa <<= 32;
676 translations[j].om_pa |= trans_cells[i++];
677 }
678 translations[j].om_mode = trans_cells[i++];
679 }
680 KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)",
681 i, sz));
682
683 sz = j;
684 qsort(translations, sz, sizeof (*translations), om_cmp);
685
686 for (i = 0; i < sz; i++) {
687 pa_base = translations[i].om_pa;
688 #ifndef __powerpc64__
689 if ((translations[i].om_pa >> 32) != 0)
690 panic("OFW translations above 32-bit boundary!");
691 #endif
692
693 if (pa_base % PAGE_SIZE)
694 panic("OFW translation not page-aligned (phys)!");
695 if (translations[i].om_va % PAGE_SIZE)
696 panic("OFW translation not page-aligned (virt)!");
697
698 CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x",
699 pa_base, translations[i].om_va, translations[i].om_len);
700
701 /* Now enter the pages for this mapping */
702
703 DISABLE_TRANS(msr);
704 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
705 /* If this address is direct-mapped, skip remapping */
706 if (hw_direct_map &&
707 translations[i].om_va == PHYS_TO_DMAP_ADDR(pa_base) &&
708 moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT)
709 == LPTE_M)
710 continue;
711
712 PMAP_LOCK(kernel_pmap);
713 pvo = moea64_pvo_find_va(kernel_pmap,
714 translations[i].om_va + off);
715 PMAP_UNLOCK(kernel_pmap);
716 if (pvo != NULL)
717 continue;
718
719 moea64_kenter(translations[i].om_va + off,
720 pa_base + off);
721 }
722 ENABLE_TRANS(msr);
723 }
724 }
725
726 #ifdef __powerpc64__
727 static void
moea64_probe_large_page(void)728 moea64_probe_large_page(void)
729 {
730 uint16_t pvr = mfpvr() >> 16;
731
732 switch (pvr) {
733 case IBM970:
734 case IBM970FX:
735 case IBM970MP:
736 powerpc_sync(); isync();
737 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
738 powerpc_sync(); isync();
739
740 /* FALLTHROUGH */
741 default:
742 if (moea64_large_page_size == 0) {
743 moea64_large_page_size = 0x1000000; /* 16 MB */
744 moea64_large_page_shift = 24;
745 }
746 }
747
748 moea64_large_page_mask = moea64_large_page_size - 1;
749 }
750
751 static void
moea64_bootstrap_slb_prefault(vm_offset_t va,int large)752 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
753 {
754 struct slb *cache;
755 struct slb entry;
756 uint64_t esid, slbe;
757 uint64_t i;
758
759 cache = PCPU_GET(aim.slb);
760 esid = va >> ADDR_SR_SHFT;
761 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
762
763 for (i = 0; i < 64; i++) {
764 if (cache[i].slbe == (slbe | i))
765 return;
766 }
767
768 entry.slbe = slbe;
769 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
770 if (large)
771 entry.slbv |= SLBV_L;
772
773 slb_insert_kernel(entry.slbe, entry.slbv);
774 }
775 #endif
776
777 static int
moea64_kenter_large(vm_offset_t va,vm_paddr_t pa,uint64_t attr,int bootstrap)778 moea64_kenter_large(vm_offset_t va, vm_paddr_t pa, uint64_t attr, int bootstrap)
779 {
780 struct pvo_entry *pvo;
781 uint64_t pte_lo;
782 int error;
783
784 pte_lo = LPTE_M;
785 pte_lo |= attr;
786
787 pvo = alloc_pvo_entry(bootstrap);
788 pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE;
789 init_pvo_entry(pvo, kernel_pmap, va);
790
791 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE |
792 VM_PROT_EXECUTE;
793 pvo->pvo_pte.pa = pa | pte_lo;
794 error = moea64_pvo_enter(pvo, NULL, NULL);
795 if (error != 0)
796 panic("Error %d inserting large page\n", error);
797 return (0);
798 }
799
800 static void
moea64_setup_direct_map(vm_offset_t kernelstart,vm_offset_t kernelend)801 moea64_setup_direct_map(vm_offset_t kernelstart,
802 vm_offset_t kernelend)
803 {
804 register_t msr;
805 vm_paddr_t pa, pkernelstart, pkernelend;
806 vm_offset_t size, off;
807 uint64_t pte_lo;
808 int i;
809
810 if (moea64_large_page_size == 0)
811 hw_direct_map = 0;
812
813 DISABLE_TRANS(msr);
814 if (hw_direct_map) {
815 PMAP_LOCK(kernel_pmap);
816 for (i = 0; i < pregions_sz; i++) {
817 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
818 pregions[i].mr_size; pa += moea64_large_page_size) {
819 pte_lo = LPTE_M;
820 if (pa & moea64_large_page_mask) {
821 pa &= moea64_large_page_mask;
822 pte_lo |= LPTE_G;
823 }
824 if (pa + moea64_large_page_size >
825 pregions[i].mr_start + pregions[i].mr_size)
826 pte_lo |= LPTE_G;
827
828 moea64_kenter_large(PHYS_TO_DMAP_ADDR(pa), pa, pte_lo, 1);
829 }
830 }
831 PMAP_UNLOCK(kernel_pmap);
832 }
833
834 /*
835 * Make sure the kernel and BPVO pool stay mapped on systems either
836 * without a direct map or on which the kernel is not already executing
837 * out of the direct-mapped region.
838 */
839 if (kernelstart < DMAP_BASE_ADDRESS) {
840 /*
841 * For pre-dmap execution, we need to use identity mapping
842 * because we will be operating with the mmu on but in the
843 * wrong address configuration until we __restartkernel().
844 */
845 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
846 pa += PAGE_SIZE)
847 moea64_kenter(pa, pa);
848 } else if (!hw_direct_map) {
849 pkernelstart = kernelstart & ~DMAP_BASE_ADDRESS;
850 pkernelend = kernelend & ~DMAP_BASE_ADDRESS;
851 for (pa = pkernelstart & ~PAGE_MASK; pa < pkernelend;
852 pa += PAGE_SIZE)
853 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
854 }
855
856 if (!hw_direct_map) {
857 size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
858 off = (vm_offset_t)(moea64_bpvo_pool);
859 for (pa = off; pa < off + size; pa += PAGE_SIZE)
860 moea64_kenter(pa, pa);
861
862 /* Map exception vectors */
863 for (pa = EXC_RSVD; pa < EXC_LAST; pa += PAGE_SIZE)
864 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
865 }
866 ENABLE_TRANS(msr);
867
868 /*
869 * Allow user to override unmapped_buf_allowed for testing.
870 * XXXKIB Only direct map implementation was tested.
871 */
872 if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
873 &unmapped_buf_allowed))
874 unmapped_buf_allowed = hw_direct_map;
875 }
876
877 /* Quick sort callout for comparing physical addresses. */
878 static int
pa_cmp(const void * a,const void * b)879 pa_cmp(const void *a, const void *b)
880 {
881 const vm_paddr_t *pa = a, *pb = b;
882
883 if (*pa < *pb)
884 return (-1);
885 else if (*pa > *pb)
886 return (1);
887 else
888 return (0);
889 }
890
891 void
moea64_early_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)892 moea64_early_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
893 {
894 int i, j;
895 vm_size_t physsz, hwphyssz;
896 vm_paddr_t kernelphysstart, kernelphysend;
897 int rm_pavail;
898
899 /* Level 0 reservations consist of 4096 pages (16MB superpage). */
900 vm_level_0_order = VM_LEVEL_0_ORDER_HPT;
901
902 #ifndef __powerpc64__
903 /* We don't have a direct map since there is no BAT */
904 hw_direct_map = 0;
905
906 /* Make sure battable is zero, since we have no BAT */
907 for (i = 0; i < 16; i++) {
908 battable[i].batu = 0;
909 battable[i].batl = 0;
910 }
911 #else
912 /* Install trap handlers for SLBs */
913 bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap);
914 bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap);
915 __syncicache((void *)EXC_DSE, 0x80);
916 __syncicache((void *)EXC_ISE, 0x80);
917 #endif
918
919 kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
920 kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
921
922 /* Get physical memory regions from firmware */
923 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
924 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
925
926 if (PHYS_AVAIL_ENTRIES < regions_sz)
927 panic("moea64_bootstrap: phys_avail too small");
928
929 phys_avail_count = 0;
930 physsz = 0;
931 hwphyssz = 0;
932 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
933 for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
934 CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)",
935 regions[i].mr_start, regions[i].mr_start +
936 regions[i].mr_size, regions[i].mr_size);
937 if (hwphyssz != 0 &&
938 (physsz + regions[i].mr_size) >= hwphyssz) {
939 if (physsz < hwphyssz) {
940 phys_avail[j] = regions[i].mr_start;
941 phys_avail[j + 1] = regions[i].mr_start +
942 hwphyssz - physsz;
943 physsz = hwphyssz;
944 phys_avail_count++;
945 dump_avail[j] = phys_avail[j];
946 dump_avail[j + 1] = phys_avail[j + 1];
947 }
948 break;
949 }
950 phys_avail[j] = regions[i].mr_start;
951 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
952 phys_avail_count++;
953 physsz += regions[i].mr_size;
954 dump_avail[j] = phys_avail[j];
955 dump_avail[j + 1] = phys_avail[j + 1];
956 }
957
958 /* Check for overlap with the kernel and exception vectors */
959 rm_pavail = 0;
960 for (j = 0; j < 2*phys_avail_count; j+=2) {
961 if (phys_avail[j] < EXC_LAST)
962 phys_avail[j] += EXC_LAST;
963
964 if (phys_avail[j] >= kernelphysstart &&
965 phys_avail[j+1] <= kernelphysend) {
966 phys_avail[j] = phys_avail[j+1] = ~0;
967 rm_pavail++;
968 continue;
969 }
970
971 if (kernelphysstart >= phys_avail[j] &&
972 kernelphysstart < phys_avail[j+1]) {
973 if (kernelphysend < phys_avail[j+1]) {
974 phys_avail[2*phys_avail_count] =
975 (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
976 phys_avail[2*phys_avail_count + 1] =
977 phys_avail[j+1];
978 phys_avail_count++;
979 }
980
981 phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
982 }
983
984 if (kernelphysend >= phys_avail[j] &&
985 kernelphysend < phys_avail[j+1]) {
986 if (kernelphysstart > phys_avail[j]) {
987 phys_avail[2*phys_avail_count] = phys_avail[j];
988 phys_avail[2*phys_avail_count + 1] =
989 kernelphysstart & ~PAGE_MASK;
990 phys_avail_count++;
991 }
992
993 phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
994 PAGE_SIZE;
995 }
996 }
997
998 /* Remove physical available regions marked for removal (~0) */
999 if (rm_pavail) {
1000 qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]),
1001 pa_cmp);
1002 phys_avail_count -= rm_pavail;
1003 for (i = 2*phys_avail_count;
1004 i < 2*(phys_avail_count + rm_pavail); i+=2)
1005 phys_avail[i] = phys_avail[i+1] = 0;
1006 }
1007
1008 physmem = btoc(physsz);
1009
1010 #ifdef PTEGCOUNT
1011 moea64_pteg_count = PTEGCOUNT;
1012 #else
1013 moea64_pteg_count = 0x1000;
1014
1015 while (moea64_pteg_count < physmem)
1016 moea64_pteg_count <<= 1;
1017
1018 moea64_pteg_count >>= 1;
1019 #endif /* PTEGCOUNT */
1020 }
1021
1022 void
moea64_mid_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1023 moea64_mid_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1024 {
1025 vm_paddr_t pa;
1026 int i;
1027
1028 /*
1029 * Set PTEG mask
1030 */
1031 moea64_pteg_mask = moea64_pteg_count - 1;
1032
1033 /*
1034 * Initialize SLB table lock and page locks
1035 */
1036 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
1037 for (i = 0; i < PV_LOCK_COUNT; i++)
1038 rw_init(&pv_lock[i], "pv lock");
1039
1040 /*
1041 * Initialise the bootstrap pvo pool.
1042 */
1043 TUNABLE_INT_FETCH("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size);
1044 if (moea64_bpvo_pool_size == 0) {
1045 if (!hw_direct_map)
1046 moea64_bpvo_pool_size = ((ptoa((uintmax_t)physmem) * sizeof(struct vm_page)) /
1047 (PAGE_SIZE * PAGE_SIZE)) * BPVO_POOL_EXPANSION_FACTOR;
1048 else
1049 moea64_bpvo_pool_size = BPVO_POOL_SIZE;
1050 }
1051
1052 if (boothowto & RB_VERBOSE) {
1053 printf("mmu_oea64: bpvo pool entries = %d, bpvo pool size = %zu MB\n",
1054 moea64_bpvo_pool_size,
1055 moea64_bpvo_pool_size*sizeof(struct pvo_entry) / 1048576);
1056 }
1057
1058 pa = moea64_bootstrap_alloc(
1059 moea64_bpvo_pool_size * sizeof(struct pvo_entry), PAGE_SIZE);
1060 moea64_bpvo_pool_index = 0;
1061
1062 /* Place at address usable through the direct map */
1063 if (hw_direct_map)
1064 moea64_bpvo_pool = PHYS_TO_DMAP(pa);
1065 else
1066 moea64_bpvo_pool = (struct pvo_entry *)pa;
1067
1068 /*
1069 * Make sure kernel vsid is allocated as well as VSID 0.
1070 */
1071 #ifndef __powerpc64__
1072 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
1073 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
1074 moea64_vsid_bitmap[0] |= 1;
1075 #endif
1076
1077 /*
1078 * Initialize the kernel pmap (which is statically allocated).
1079 */
1080 #ifdef __powerpc64__
1081 for (i = 0; i < 64; i++) {
1082 pcpup->pc_aim.slb[i].slbv = 0;
1083 pcpup->pc_aim.slb[i].slbe = 0;
1084 }
1085 #else
1086 for (i = 0; i < 16; i++)
1087 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
1088 #endif
1089
1090 kernel_pmap->pmap_phys = kernel_pmap;
1091 CPU_FILL(&kernel_pmap->pm_active);
1092 RB_INIT(&kernel_pmap->pmap_pvo);
1093
1094 mtx_init(&kernel_pmap->pm_mtx, "kernel pmap", NULL, MTX_DEF);
1095
1096 /*
1097 * Now map in all the other buffers we allocated earlier
1098 */
1099
1100 moea64_setup_direct_map(kernelstart, kernelend);
1101 }
1102
1103 void
moea64_late_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1104 moea64_late_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1105 {
1106 ihandle_t mmui;
1107 phandle_t chosen;
1108 phandle_t mmu;
1109 ssize_t sz;
1110 int i;
1111 vm_paddr_t pa;
1112 vm_offset_t va;
1113 void *dpcpu;
1114
1115 /*
1116 * Set up the Open Firmware pmap and add its mappings if not in real
1117 * mode.
1118 */
1119
1120 chosen = OF_finddevice("/chosen");
1121 if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) {
1122 mmu = OF_instance_to_package(mmui);
1123 if (mmu == -1 ||
1124 (sz = OF_getproplen(mmu, "translations")) == -1)
1125 sz = 0;
1126 if (sz > 6144 /* tmpstksz - 2 KB headroom */)
1127 panic("moea64_bootstrap: too many ofw translations");
1128
1129 if (sz > 0)
1130 moea64_add_ofw_mappings(mmu, sz);
1131 }
1132
1133 /*
1134 * Calculate the last available physical address.
1135 */
1136 Maxmem = 0;
1137 for (i = 0; phys_avail[i + 1] != 0; i += 2)
1138 Maxmem = MAX(Maxmem, powerpc_btop(phys_avail[i + 1]));
1139
1140 /*
1141 * Initialize MMU.
1142 */
1143 pmap_cpu_bootstrap(0);
1144 mtmsr(mfmsr() | PSL_DR | PSL_IR);
1145 pmap_bootstrapped++;
1146
1147 /*
1148 * Set the start and end of kva.
1149 */
1150 virtual_avail = VM_MIN_KERNEL_ADDRESS;
1151 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
1152
1153 /*
1154 * Map the entire KVA range into the SLB. We must not fault there.
1155 */
1156 #ifdef __powerpc64__
1157 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
1158 moea64_bootstrap_slb_prefault(va, 0);
1159 #endif
1160
1161 /*
1162 * Remap any early IO mappings (console framebuffer, etc.)
1163 */
1164 bs_remap_earlyboot();
1165
1166 /*
1167 * Figure out how far we can extend virtual_end into segment 16
1168 * without running into existing mappings. Segment 16 is guaranteed
1169 * to contain neither RAM nor devices (at least on Apple hardware),
1170 * but will generally contain some OFW mappings we should not
1171 * step on.
1172 */
1173
1174 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */
1175 PMAP_LOCK(kernel_pmap);
1176 while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
1177 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
1178 virtual_end += PAGE_SIZE;
1179 PMAP_UNLOCK(kernel_pmap);
1180 #endif
1181
1182 /*
1183 * Allocate a kernel stack with a guard page for thread0 and map it
1184 * into the kernel page map.
1185 */
1186 pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE);
1187 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1188 virtual_avail = va + kstack_pages * PAGE_SIZE;
1189 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
1190 thread0.td_kstack = (char *)va;
1191 thread0.td_kstack_pages = kstack_pages;
1192 for (i = 0; i < kstack_pages; i++) {
1193 moea64_kenter(va, pa);
1194 pa += PAGE_SIZE;
1195 va += PAGE_SIZE;
1196 }
1197
1198 /*
1199 * Allocate virtual address space for the message buffer.
1200 */
1201 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
1202 msgbufp = (struct msgbuf *)virtual_avail;
1203 va = virtual_avail;
1204 virtual_avail += round_page(msgbufsize);
1205 while (va < virtual_avail) {
1206 moea64_kenter(va, pa);
1207 pa += PAGE_SIZE;
1208 va += PAGE_SIZE;
1209 }
1210
1211 /*
1212 * Allocate virtual address space for the dynamic percpu area.
1213 */
1214 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
1215 dpcpu = (void *)virtual_avail;
1216 va = virtual_avail;
1217 virtual_avail += DPCPU_SIZE;
1218 while (va < virtual_avail) {
1219 moea64_kenter(va, pa);
1220 pa += PAGE_SIZE;
1221 va += PAGE_SIZE;
1222 }
1223 dpcpu_init(dpcpu, curcpu);
1224
1225 crashdumpmap = (caddr_t)virtual_avail;
1226 virtual_avail += MAXDUMPPGS * PAGE_SIZE;
1227
1228 /*
1229 * Allocate some things for page zeroing. We put this directly
1230 * in the page table and use MOEA64_PTE_REPLACE to avoid any
1231 * of the PVO book-keeping or other parts of the VM system
1232 * from even knowing that this hack exists.
1233 */
1234
1235 if (!hw_direct_map) {
1236 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
1237 MTX_DEF);
1238 for (i = 0; i < 2; i++) {
1239 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
1240 virtual_end -= PAGE_SIZE;
1241
1242 moea64_kenter(moea64_scratchpage_va[i], 0);
1243
1244 PMAP_LOCK(kernel_pmap);
1245 moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
1246 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
1247 PMAP_UNLOCK(kernel_pmap);
1248 }
1249 }
1250
1251 numa_mem_regions(&numa_pregions, &numapregions_sz);
1252 }
1253
1254 static void
moea64_pmap_init_qpages(void * dummy __unused)1255 moea64_pmap_init_qpages(void *dummy __unused)
1256 {
1257 struct pcpu *pc;
1258 int i;
1259
1260 if (hw_direct_map)
1261 return;
1262
1263 CPU_FOREACH(i) {
1264 pc = pcpu_find(i);
1265 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
1266 if (pc->pc_qmap_addr == NULL)
1267 panic("pmap_init_qpages: unable to allocate KVA");
1268 PMAP_LOCK(kernel_pmap);
1269 pc->pc_aim.qmap_pvo = moea64_pvo_find_va(kernel_pmap,
1270 (vm_offset_t)pc->pc_qmap_addr);
1271 PMAP_UNLOCK(kernel_pmap);
1272 mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF);
1273 }
1274 }
1275
1276 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL);
1277
1278 /*
1279 * Activate a user pmap. This mostly involves setting some non-CPU
1280 * state.
1281 */
1282 void
moea64_activate(struct thread * td)1283 moea64_activate(struct thread *td)
1284 {
1285 pmap_t pm;
1286
1287 pm = &td->td_proc->p_vmspace->vm_pmap;
1288 CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
1289
1290 #ifdef __powerpc64__
1291 PCPU_SET(aim.userslb, pm->pm_slb);
1292 __asm __volatile("slbmte %0, %1; isync" ::
1293 "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
1294 #else
1295 PCPU_SET(curpmap, pm->pmap_phys);
1296 mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid);
1297 #endif
1298 }
1299
1300 void
moea64_deactivate(struct thread * td)1301 moea64_deactivate(struct thread *td)
1302 {
1303 pmap_t pm;
1304
1305 __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
1306
1307 pm = &td->td_proc->p_vmspace->vm_pmap;
1308 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
1309 #ifdef __powerpc64__
1310 PCPU_SET(aim.userslb, NULL);
1311 #else
1312 PCPU_SET(curpmap, NULL);
1313 #endif
1314 }
1315
1316 void
moea64_unwire(pmap_t pm,vm_offset_t sva,vm_offset_t eva)1317 moea64_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1318 {
1319 struct pvo_entry key, *pvo;
1320 vm_page_t m;
1321 int64_t refchg;
1322
1323 key.pvo_vaddr = sva;
1324 PMAP_LOCK(pm);
1325 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
1326 pvo != NULL && PVO_VADDR(pvo) < eva;
1327 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
1328 if (PVO_IS_SP(pvo)) {
1329 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
1330 pvo = moea64_sp_unwire(pvo);
1331 continue;
1332 } else {
1333 CTR1(KTR_PMAP, "%s: demote before unwire",
1334 __func__);
1335 moea64_sp_demote(pvo);
1336 }
1337 }
1338
1339 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1340 panic("moea64_unwire: pvo %p is missing PVO_WIRED",
1341 pvo);
1342 pvo->pvo_vaddr &= ~PVO_WIRED;
1343 refchg = moea64_pte_replace(pvo, 0 /* No invalidation */);
1344 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
1345 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1346 if (refchg < 0)
1347 refchg = LPTE_CHG;
1348 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1349
1350 refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
1351 if (refchg & LPTE_CHG)
1352 vm_page_dirty(m);
1353 if (refchg & LPTE_REF)
1354 vm_page_aflag_set(m, PGA_REFERENCED);
1355 }
1356 pm->pm_stats.wired_count--;
1357 }
1358 PMAP_UNLOCK(pm);
1359 }
1360
1361 static int
moea64_mincore(pmap_t pmap,vm_offset_t addr,vm_paddr_t * pap)1362 moea64_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
1363 {
1364 struct pvo_entry *pvo;
1365 vm_paddr_t pa;
1366 vm_page_t m;
1367 int val;
1368 bool managed;
1369
1370 PMAP_LOCK(pmap);
1371
1372 pvo = moea64_pvo_find_va(pmap, addr);
1373 if (pvo != NULL) {
1374 pa = PVO_PADDR(pvo);
1375 m = PHYS_TO_VM_PAGE(pa);
1376 managed = (pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED;
1377 if (PVO_IS_SP(pvo))
1378 val = MINCORE_INCORE | MINCORE_PSIND(1);
1379 else
1380 val = MINCORE_INCORE;
1381 } else {
1382 PMAP_UNLOCK(pmap);
1383 return (0);
1384 }
1385
1386 PMAP_UNLOCK(pmap);
1387
1388 if (m == NULL)
1389 return (0);
1390
1391 if (managed) {
1392 if (moea64_is_modified(m))
1393 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
1394
1395 if (moea64_is_referenced(m))
1396 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
1397 }
1398
1399 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
1400 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
1401 managed) {
1402 *pap = pa;
1403 }
1404
1405 return (val);
1406 }
1407
1408 /*
1409 * This goes through and sets the physical address of our
1410 * special scratch PTE to the PA we want to zero or copy. Because
1411 * of locking issues (this can get called in pvo_enter() by
1412 * the UMA allocator), we can't use most other utility functions here
1413 */
1414
1415 static __inline
moea64_set_scratchpage_pa(int which,vm_paddr_t pa)1416 void moea64_set_scratchpage_pa(int which, vm_paddr_t pa)
1417 {
1418 struct pvo_entry *pvo;
1419
1420 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
1421 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
1422
1423 pvo = moea64_scratchpage_pvo[which];
1424 PMAP_LOCK(pvo->pvo_pmap);
1425 pvo->pvo_pte.pa =
1426 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
1427 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1428 PMAP_UNLOCK(pvo->pvo_pmap);
1429 isync();
1430 }
1431
1432 void
moea64_copy_page(vm_page_t msrc,vm_page_t mdst)1433 moea64_copy_page(vm_page_t msrc, vm_page_t mdst)
1434 {
1435 mtx_lock(&moea64_scratchpage_mtx);
1436
1437 moea64_set_scratchpage_pa(0, VM_PAGE_TO_PHYS(msrc));
1438 moea64_set_scratchpage_pa(1, VM_PAGE_TO_PHYS(mdst));
1439
1440 bcopy((void *)moea64_scratchpage_va[0],
1441 (void *)moea64_scratchpage_va[1], PAGE_SIZE);
1442
1443 mtx_unlock(&moea64_scratchpage_mtx);
1444 }
1445
1446 void
moea64_copy_page_dmap(vm_page_t msrc,vm_page_t mdst)1447 moea64_copy_page_dmap(vm_page_t msrc, vm_page_t mdst)
1448 {
1449 bcopy(VM_PAGE_TO_DMAP(msrc), VM_PAGE_TO_DMAP(mdst), PAGE_SIZE);
1450 }
1451
1452 inline void
moea64_copy_pages_dmap(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1453 moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
1454 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1455 {
1456 void *a_cp, *b_cp;
1457 vm_offset_t a_pg_offset, b_pg_offset;
1458 int cnt;
1459
1460 while (xfersize > 0) {
1461 a_pg_offset = a_offset & PAGE_MASK;
1462 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1463 a_cp = (char *)VM_PAGE_TO_DMAP(ma[a_offset >> PAGE_SHIFT]) +
1464 a_pg_offset;
1465 b_pg_offset = b_offset & PAGE_MASK;
1466 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1467 b_cp = (char *)VM_PAGE_TO_DMAP(mb[b_offset >> PAGE_SHIFT]) +
1468 b_pg_offset;
1469 bcopy(a_cp, b_cp, cnt);
1470 a_offset += cnt;
1471 b_offset += cnt;
1472 xfersize -= cnt;
1473 }
1474 }
1475
1476 void
moea64_copy_pages(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1477 moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
1478 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1479 {
1480 void *a_cp, *b_cp;
1481 vm_offset_t a_pg_offset, b_pg_offset;
1482 int cnt;
1483
1484 mtx_lock(&moea64_scratchpage_mtx);
1485 while (xfersize > 0) {
1486 a_pg_offset = a_offset & PAGE_MASK;
1487 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1488 moea64_set_scratchpage_pa(0,
1489 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
1490 a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
1491 b_pg_offset = b_offset & PAGE_MASK;
1492 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1493 moea64_set_scratchpage_pa(1,
1494 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
1495 b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
1496 bcopy(a_cp, b_cp, cnt);
1497 a_offset += cnt;
1498 b_offset += cnt;
1499 xfersize -= cnt;
1500 }
1501 mtx_unlock(&moea64_scratchpage_mtx);
1502 }
1503
1504 void
moea64_zero_page_area(vm_page_t m,int off,int size)1505 moea64_zero_page_area(vm_page_t m, int off, int size)
1506 {
1507 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1508
1509 if (size + off > PAGE_SIZE)
1510 panic("moea64_zero_page: size + off > PAGE_SIZE");
1511
1512 if (hw_direct_map) {
1513 bzero((caddr_t)PHYS_TO_DMAP(pa) + off, size);
1514 } else {
1515 mtx_lock(&moea64_scratchpage_mtx);
1516 moea64_set_scratchpage_pa(0, pa);
1517 bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
1518 mtx_unlock(&moea64_scratchpage_mtx);
1519 }
1520 }
1521
1522 /*
1523 * Zero a page of physical memory by temporarily mapping it
1524 */
1525 void
moea64_zero_page(vm_page_t m)1526 moea64_zero_page(vm_page_t m)
1527 {
1528 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1529 vm_offset_t va;
1530
1531 mtx_lock(&moea64_scratchpage_mtx);
1532
1533 moea64_set_scratchpage_pa(0, pa);
1534 va = moea64_scratchpage_va[0];
1535
1536 bzero((void *)va, PAGE_SIZE);
1537
1538 mtx_unlock(&moea64_scratchpage_mtx);
1539 }
1540
1541 void
moea64_zero_page_dmap(vm_page_t m)1542 moea64_zero_page_dmap(vm_page_t m)
1543 {
1544 bzero(VM_PAGE_TO_DMAP(m), PAGE_SIZE);
1545 }
1546
1547 void *
moea64_quick_enter_page(vm_page_t m)1548 moea64_quick_enter_page(vm_page_t m)
1549 {
1550 struct pvo_entry *pvo;
1551 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1552
1553 /*
1554 * MOEA64_PTE_REPLACE does some locking, so we can't just grab
1555 * a critical section and access the PCPU data like on i386.
1556 * Instead, pin the thread and grab the PCPU lock to prevent
1557 * a preempting thread from using the same PCPU data.
1558 */
1559 sched_pin();
1560
1561 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED);
1562 pvo = PCPU_GET(aim.qmap_pvo);
1563
1564 mtx_lock(PCPU_PTR(aim.qmap_lock));
1565 pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) |
1566 (uint64_t)pa;
1567 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1568 isync();
1569
1570 return (PCPU_GET(qmap_addr));
1571 }
1572
1573 void *
moea64_quick_enter_page_dmap(vm_page_t m)1574 moea64_quick_enter_page_dmap(vm_page_t m)
1575 {
1576
1577 return (VM_PAGE_TO_DMAP(m));
1578 }
1579
1580 void
moea64_quick_remove_page(void * addr)1581 moea64_quick_remove_page(void *addr)
1582 {
1583
1584 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED);
1585 KASSERT(PCPU_GET(qmap_addr) == addr,
1586 ("moea64_quick_remove_page: invalid address"));
1587 mtx_unlock(PCPU_PTR(aim.qmap_lock));
1588 sched_unpin();
1589 }
1590
1591 bool
moea64_page_is_mapped(vm_page_t m)1592 moea64_page_is_mapped(vm_page_t m)
1593 {
1594 return (!LIST_EMPTY(&(m)->md.mdpg_pvoh));
1595 }
1596
1597 /*
1598 * Map the given physical page at the specified virtual address in the
1599 * target pmap with the protection requested. If specified the page
1600 * will be wired down.
1601 */
1602
1603 int
moea64_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)1604 moea64_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
1605 vm_prot_t prot, u_int flags, int8_t psind)
1606 {
1607 struct pvo_entry *pvo, *oldpvo, *tpvo;
1608 struct pvo_head *pvo_head;
1609 uint64_t pte_lo;
1610 int error;
1611 vm_paddr_t pa;
1612
1613 if ((m->oflags & VPO_UNMANAGED) == 0) {
1614 if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0)
1615 VM_PAGE_OBJECT_BUSY_ASSERT(m);
1616 else
1617 VM_OBJECT_ASSERT_LOCKED(m->object);
1618 }
1619
1620 if (psind > 0)
1621 return (moea64_sp_enter(pmap, va, m, prot, flags, psind));
1622
1623 pvo = alloc_pvo_entry(0);
1624 if (pvo == NULL)
1625 return (KERN_RESOURCE_SHORTAGE);
1626 pvo->pvo_pmap = NULL; /* to be filled in later */
1627 pvo->pvo_pte.prot = prot;
1628
1629 pa = VM_PAGE_TO_PHYS(m);
1630 pte_lo = moea64_calc_wimg(pa, pmap_page_get_memattr(m));
1631 pvo->pvo_pte.pa = pa | pte_lo;
1632
1633 if ((flags & PMAP_ENTER_WIRED) != 0)
1634 pvo->pvo_vaddr |= PVO_WIRED;
1635
1636 if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) {
1637 pvo_head = NULL;
1638 } else {
1639 pvo_head = &m->md.mdpg_pvoh;
1640 pvo->pvo_vaddr |= PVO_MANAGED;
1641 }
1642
1643 PV_WR_LOCK(pa);
1644 PMAP_LOCK(pmap);
1645 if (pvo->pvo_pmap == NULL)
1646 init_pvo_entry(pvo, pmap, va);
1647
1648 if (moea64_ps_enabled(pmap) &&
1649 (tpvo = moea64_pvo_find_va(pmap, va & ~HPT_SP_MASK)) != NULL &&
1650 PVO_IS_SP(tpvo)) {
1651 /* Demote SP before entering a regular page */
1652 CTR2(KTR_PMAP, "%s: demote before enter: va=%#jx",
1653 __func__, (uintmax_t)va);
1654 moea64_sp_demote_aligned(tpvo);
1655 }
1656
1657 if (prot & VM_PROT_WRITE)
1658 if (pmap_bootstrapped &&
1659 (m->oflags & VPO_UNMANAGED) == 0)
1660 vm_page_aflag_set(m, PGA_WRITEABLE);
1661
1662 error = moea64_pvo_enter(pvo, pvo_head, &oldpvo);
1663 if (error == EEXIST) {
1664 if (oldpvo->pvo_vaddr == pvo->pvo_vaddr &&
1665 oldpvo->pvo_pte.pa == pvo->pvo_pte.pa &&
1666 oldpvo->pvo_pte.prot == prot) {
1667 /* Identical mapping already exists */
1668 error = 0;
1669
1670 /* If not in page table, reinsert it */
1671 if (moea64_pte_synch(oldpvo) < 0) {
1672 STAT_MOEA64(moea64_pte_overflow--);
1673 moea64_pte_insert(oldpvo);
1674 }
1675
1676 /* Then just clean up and go home */
1677 PMAP_UNLOCK(pmap);
1678 PV_UNLOCK(pa);
1679 free_pvo_entry(pvo);
1680 pvo = NULL;
1681 goto out;
1682 } else {
1683 /* Otherwise, need to kill it first */
1684 KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old "
1685 "mapping does not match new mapping"));
1686 moea64_pvo_remove_from_pmap(oldpvo);
1687 moea64_pvo_enter(pvo, pvo_head, NULL);
1688 }
1689 }
1690 PMAP_UNLOCK(pmap);
1691 PV_UNLOCK(pa);
1692
1693 /* Free any dead pages */
1694 if (error == EEXIST) {
1695 moea64_pvo_remove_from_page(oldpvo);
1696 free_pvo_entry(oldpvo);
1697 }
1698
1699 out:
1700 /*
1701 * Flush the page from the instruction cache if this page is
1702 * mapped executable and cacheable.
1703 */
1704 if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 &&
1705 (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
1706 vm_page_aflag_set(m, PGA_EXECUTABLE);
1707 moea64_syncicache(pmap, va, pa, PAGE_SIZE);
1708 }
1709
1710 #if VM_NRESERVLEVEL > 0
1711 /*
1712 * Try to promote pages.
1713 *
1714 * If the VA of the entered page is not aligned with its PA,
1715 * don't try page promotion as it is not possible.
1716 * This reduces the number of promotion failures dramatically.
1717 *
1718 * Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
1719 */
1720 if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
1721 (pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
1722 (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
1723 ((prot & VM_PROT_NO_PROMOTE) == 0 ||
1724 (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
1725 (m->flags & PG_FICTITIOUS) == 0 &&
1726 vm_reserv_level_iffullpop(m) == 0)
1727 moea64_sp_promote(pmap, va, m);
1728 #endif
1729
1730 return (KERN_SUCCESS);
1731 }
1732
1733 static void
moea64_syncicache(pmap_t pmap,vm_offset_t va,vm_paddr_t pa,vm_size_t sz)1734 moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
1735 vm_size_t sz)
1736 {
1737
1738 /*
1739 * This is much trickier than on older systems because
1740 * we can't sync the icache on physical addresses directly
1741 * without a direct map. Instead we check a couple of cases
1742 * where the memory is already mapped in and, failing that,
1743 * use the same trick we use for page zeroing to create
1744 * a temporary mapping for this physical address.
1745 */
1746
1747 if (!pmap_bootstrapped) {
1748 /*
1749 * If PMAP is not bootstrapped, we are likely to be
1750 * in real mode.
1751 */
1752 __syncicache((void *)(uintptr_t)pa, sz);
1753 } else if (pmap == kernel_pmap) {
1754 __syncicache((void *)va, sz);
1755 } else if (hw_direct_map) {
1756 __syncicache(PHYS_TO_DMAP(pa), sz);
1757 } else {
1758 /* Use the scratch page to set up a temp mapping */
1759
1760 mtx_lock(&moea64_scratchpage_mtx);
1761
1762 moea64_set_scratchpage_pa(1, pa & ~ADDR_POFF);
1763 __syncicache((void *)(moea64_scratchpage_va[1] +
1764 (va & ADDR_POFF)), sz);
1765
1766 mtx_unlock(&moea64_scratchpage_mtx);
1767 }
1768 }
1769
1770 /*
1771 * Maps a sequence of resident pages belonging to the same object.
1772 * The sequence begins with the given page m_start. This page is
1773 * mapped at the given virtual address start. Each subsequent page is
1774 * mapped at a virtual address that is offset from start by the same
1775 * amount as the page is offset from m_start within the object. The
1776 * last page in the sequence is the page with the largest offset from
1777 * m_start that can be mapped at a virtual address less than the given
1778 * virtual address end. Not every virtual page between start and end
1779 * is mapped; only those for which a resident page exists with the
1780 * corresponding offset from m_start are mapped.
1781 */
1782 void
moea64_enter_object(pmap_t pm,vm_offset_t start,vm_offset_t end,vm_page_t m_start,vm_prot_t prot)1783 moea64_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1784 vm_page_t m_start, vm_prot_t prot)
1785 {
1786 struct pctrie_iter pages;
1787 vm_page_t m;
1788 vm_offset_t va;
1789 int8_t psind;
1790
1791 VM_OBJECT_ASSERT_LOCKED(m_start->object);
1792
1793 vm_page_iter_limit_init(&pages, m_start->object,
1794 m_start->pindex + atop(end - start));
1795 m = vm_radix_iter_lookup(&pages, m_start->pindex);
1796 while (m != NULL) {
1797 va = start + ptoa(m->pindex - m_start->pindex);
1798 if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end &&
1799 m->psind == 1 && moea64_ps_enabled(pm))
1800 psind = 1;
1801 else
1802 psind = 0;
1803 moea64_enter(pm, va, m, prot &
1804 (VM_PROT_READ | VM_PROT_EXECUTE),
1805 PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind);
1806 if (psind == 1)
1807 m = vm_radix_iter_jump(&pages, HPT_SP_SIZE / PAGE_SIZE);
1808 else
1809 m = vm_radix_iter_step(&pages);
1810 }
1811 }
1812
1813 void
moea64_enter_quick(pmap_t pm,vm_offset_t va,vm_page_t m,vm_prot_t prot)1814 moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
1815 vm_prot_t prot)
1816 {
1817
1818 moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
1819 VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
1820 0);
1821 }
1822
1823 vm_paddr_t
moea64_extract(pmap_t pm,vm_offset_t va)1824 moea64_extract(pmap_t pm, vm_offset_t va)
1825 {
1826 struct pvo_entry *pvo;
1827 vm_paddr_t pa;
1828
1829 PMAP_LOCK(pm);
1830 pvo = moea64_pvo_find_va(pm, va);
1831 if (pvo == NULL)
1832 pa = 0;
1833 else
1834 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
1835 PMAP_UNLOCK(pm);
1836
1837 return (pa);
1838 }
1839
1840 /*
1841 * Atomically extract and hold the physical page with the given
1842 * pmap and virtual address pair if that mapping permits the given
1843 * protection.
1844 */
1845 vm_page_t
moea64_extract_and_hold(pmap_t pmap,vm_offset_t va,vm_prot_t prot)1846 moea64_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1847 {
1848 struct pvo_entry *pvo;
1849 vm_page_t m;
1850
1851 m = NULL;
1852 PMAP_LOCK(pmap);
1853 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1854 if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
1855 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1856 if (!vm_page_wire_mapped(m))
1857 m = NULL;
1858 }
1859 PMAP_UNLOCK(pmap);
1860 return (m);
1861 }
1862
1863 static void *
moea64_uma_page_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * flags,int wait)1864 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
1865 uint8_t *flags, int wait)
1866 {
1867 struct pvo_entry *pvo;
1868 vm_offset_t va;
1869 vm_page_t m;
1870 int needed_lock;
1871
1872 /*
1873 * This entire routine is a horrible hack to avoid bothering kmem
1874 * for new KVA addresses. Because this can get called from inside
1875 * kmem allocation routines, calling kmem for a new address here
1876 * can lead to multiply locking non-recursive mutexes.
1877 */
1878
1879 *flags = UMA_SLAB_PRIV;
1880 needed_lock = !PMAP_LOCKED(kernel_pmap);
1881
1882 m = vm_page_alloc_noobj_domain(domain, malloc2vm_flags(wait) |
1883 VM_ALLOC_WIRED);
1884 if (m == NULL)
1885 return (NULL);
1886
1887 va = VM_PAGE_TO_PHYS(m);
1888
1889 pvo = alloc_pvo_entry(1 /* bootstrap */);
1890
1891 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE;
1892 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M;
1893
1894 if (needed_lock)
1895 PMAP_LOCK(kernel_pmap);
1896
1897 init_pvo_entry(pvo, kernel_pmap, va);
1898 pvo->pvo_vaddr |= PVO_WIRED;
1899
1900 moea64_pvo_enter(pvo, NULL, NULL);
1901
1902 if (needed_lock)
1903 PMAP_UNLOCK(kernel_pmap);
1904
1905 return (void *)va;
1906 }
1907
1908 extern int elf32_nxstack;
1909
1910 void
moea64_init(void)1911 moea64_init(void)
1912 {
1913
1914 CTR0(KTR_PMAP, "moea64_init");
1915
1916 moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1917 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1918 UMA_ZONE_VM | UMA_ZONE_NOFREE);
1919
1920 /* Are large page mappings enabled? */
1921 superpages_enabled = 1;
1922 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
1923 if (superpages_enabled) {
1924 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
1925 ("moea64_init: can't assign to pagesizes[1]"));
1926
1927 if (moea64_large_page_size == 0) {
1928 printf("mmu_oea64: HW does not support large pages. "
1929 "Disabling superpages...\n");
1930 superpages_enabled = 0;
1931 } else if (!moea64_has_lp_4k_16m) {
1932 printf("mmu_oea64: "
1933 "HW does not support mixed 4KB/16MB page sizes. "
1934 "Disabling superpages...\n");
1935 superpages_enabled = 0;
1936 } else
1937 pagesizes[1] = HPT_SP_SIZE;
1938 }
1939
1940 if (!hw_direct_map) {
1941 uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
1942 }
1943
1944 #ifdef COMPAT_FREEBSD32
1945 elf32_nxstack = 1;
1946 #endif
1947
1948 moea64_initialized = true;
1949 }
1950
1951 bool
moea64_is_referenced(vm_page_t m)1952 moea64_is_referenced(vm_page_t m)
1953 {
1954
1955 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1956 ("moea64_is_referenced: page %p is not managed", m));
1957
1958 return (moea64_query_bit(m, LPTE_REF));
1959 }
1960
1961 bool
moea64_is_modified(vm_page_t m)1962 moea64_is_modified(vm_page_t m)
1963 {
1964
1965 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1966 ("moea64_is_modified: page %p is not managed", m));
1967
1968 /*
1969 * If the page is not busied then this check is racy.
1970 */
1971 if (!pmap_page_is_write_mapped(m))
1972 return (false);
1973
1974 return (moea64_query_bit(m, LPTE_CHG));
1975 }
1976
1977 bool
moea64_is_prefaultable(pmap_t pmap,vm_offset_t va)1978 moea64_is_prefaultable(pmap_t pmap, vm_offset_t va)
1979 {
1980 struct pvo_entry *pvo;
1981 bool rv = true;
1982
1983 PMAP_LOCK(pmap);
1984 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1985 if (pvo != NULL)
1986 rv = false;
1987 PMAP_UNLOCK(pmap);
1988 return (rv);
1989 }
1990
1991 void
moea64_clear_modify(vm_page_t m)1992 moea64_clear_modify(vm_page_t m)
1993 {
1994
1995 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1996 ("moea64_clear_modify: page %p is not managed", m));
1997 vm_page_assert_busied(m);
1998
1999 if (!pmap_page_is_write_mapped(m))
2000 return;
2001 moea64_clear_bit(m, LPTE_CHG);
2002 }
2003
2004 /*
2005 * Clear the write and modified bits in each of the given page's mappings.
2006 */
2007 void
moea64_remove_write(vm_page_t m)2008 moea64_remove_write(vm_page_t m)
2009 {
2010 struct pvo_entry *pvo;
2011 int64_t refchg, ret;
2012 pmap_t pmap;
2013
2014 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2015 ("moea64_remove_write: page %p is not managed", m));
2016 vm_page_assert_busied(m);
2017
2018 if (!pmap_page_is_write_mapped(m))
2019 return;
2020
2021 powerpc_sync();
2022 PV_PAGE_WR_LOCK(m);
2023 refchg = 0;
2024 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2025 pmap = pvo->pvo_pmap;
2026 PMAP_LOCK(pmap);
2027 if (!(pvo->pvo_vaddr & PVO_DEAD) &&
2028 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2029 if (PVO_IS_SP(pvo)) {
2030 CTR1(KTR_PMAP, "%s: demote before remwr",
2031 __func__);
2032 moea64_sp_demote(pvo);
2033 }
2034 pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
2035 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2036 if (ret < 0)
2037 ret = LPTE_CHG;
2038 refchg |= ret;
2039 if (pvo->pvo_pmap == kernel_pmap)
2040 isync();
2041 }
2042 PMAP_UNLOCK(pmap);
2043 }
2044 if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
2045 vm_page_dirty(m);
2046 vm_page_aflag_clear(m, PGA_WRITEABLE);
2047 PV_PAGE_UNLOCK(m);
2048 }
2049
2050 /*
2051 * moea64_ts_referenced:
2052 *
2053 * Return a count of reference bits for a page, clearing those bits.
2054 * It is not necessary for every reference bit to be cleared, but it
2055 * is necessary that 0 only be returned when there are truly no
2056 * reference bits set.
2057 *
2058 * XXX: The exact number of bits to check and clear is a matter that
2059 * should be tested and standardized at some point in the future for
2060 * optimal aging of shared pages.
2061 */
2062 int
moea64_ts_referenced(vm_page_t m)2063 moea64_ts_referenced(vm_page_t m)
2064 {
2065
2066 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2067 ("moea64_ts_referenced: page %p is not managed", m));
2068 return (moea64_clear_bit(m, LPTE_REF));
2069 }
2070
2071 /*
2072 * Modify the WIMG settings of all mappings for a page.
2073 */
2074 void
moea64_page_set_memattr(vm_page_t m,vm_memattr_t ma)2075 moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2076 {
2077 struct pvo_entry *pvo;
2078 int64_t refchg;
2079 pmap_t pmap;
2080 uint64_t lo;
2081
2082 CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
2083 __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
2084
2085 if (m->md.mdpg_cache_attrs == ma)
2086 return;
2087
2088 if ((m->oflags & VPO_UNMANAGED) != 0) {
2089 m->md.mdpg_cache_attrs = ma;
2090 return;
2091 }
2092
2093 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
2094
2095 PV_PAGE_WR_LOCK(m);
2096 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2097 pmap = pvo->pvo_pmap;
2098 PMAP_LOCK(pmap);
2099 if (!(pvo->pvo_vaddr & PVO_DEAD)) {
2100 if (PVO_IS_SP(pvo)) {
2101 CTR1(KTR_PMAP,
2102 "%s: demote before set_memattr", __func__);
2103 moea64_sp_demote(pvo);
2104 }
2105 pvo->pvo_pte.pa &= ~LPTE_WIMG;
2106 pvo->pvo_pte.pa |= lo;
2107 refchg = moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
2108 if (refchg < 0)
2109 refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
2110 LPTE_CHG : 0;
2111 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2112 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2113 refchg |=
2114 atomic_readandclear_32(&m->md.mdpg_attrs);
2115 if (refchg & LPTE_CHG)
2116 vm_page_dirty(m);
2117 if (refchg & LPTE_REF)
2118 vm_page_aflag_set(m, PGA_REFERENCED);
2119 }
2120 if (pvo->pvo_pmap == kernel_pmap)
2121 isync();
2122 }
2123 PMAP_UNLOCK(pmap);
2124 }
2125 m->md.mdpg_cache_attrs = ma;
2126 PV_PAGE_UNLOCK(m);
2127 }
2128
2129 /*
2130 * Map a wired page into kernel virtual address space.
2131 */
2132 void
moea64_kenter_attr(vm_offset_t va,vm_paddr_t pa,vm_memattr_t ma)2133 moea64_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
2134 {
2135 int error;
2136 struct pvo_entry *pvo, *oldpvo;
2137
2138 do {
2139 pvo = alloc_pvo_entry(0);
2140 if (pvo == NULL)
2141 vm_wait(NULL);
2142 } while (pvo == NULL);
2143 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
2144 pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma);
2145 pvo->pvo_vaddr |= PVO_WIRED;
2146
2147 PMAP_LOCK(kernel_pmap);
2148 oldpvo = moea64_pvo_find_va(kernel_pmap, va);
2149 if (oldpvo != NULL)
2150 moea64_pvo_remove_from_pmap(oldpvo);
2151 init_pvo_entry(pvo, kernel_pmap, va);
2152 error = moea64_pvo_enter(pvo, NULL, NULL);
2153 PMAP_UNLOCK(kernel_pmap);
2154
2155 /* Free any dead pages */
2156 if (oldpvo != NULL) {
2157 moea64_pvo_remove_from_page(oldpvo);
2158 free_pvo_entry(oldpvo);
2159 }
2160
2161 if (error != 0)
2162 panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va,
2163 (uintmax_t)pa, error);
2164 }
2165
2166 void
moea64_kenter(vm_offset_t va,vm_paddr_t pa)2167 moea64_kenter(vm_offset_t va, vm_paddr_t pa)
2168 {
2169
2170 moea64_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
2171 }
2172
2173 /*
2174 * Extract the physical page address associated with the given kernel virtual
2175 * address.
2176 */
2177 vm_paddr_t
moea64_kextract(vm_offset_t va)2178 moea64_kextract(vm_offset_t va)
2179 {
2180 struct pvo_entry *pvo;
2181 vm_paddr_t pa;
2182
2183 /*
2184 * Shortcut the direct-mapped case when applicable. We never put
2185 * anything but 1:1 (or 62-bit aliased) mappings below
2186 * VM_MIN_KERNEL_ADDRESS.
2187 */
2188 if (va < VM_MIN_KERNEL_ADDRESS)
2189 return (va & ~DMAP_BASE_ADDRESS);
2190
2191 PMAP_LOCK(kernel_pmap);
2192 pvo = moea64_pvo_find_va(kernel_pmap, va);
2193 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
2194 va));
2195 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
2196 PMAP_UNLOCK(kernel_pmap);
2197 return (pa);
2198 }
2199
2200 /*
2201 * Remove a wired page from kernel virtual address space.
2202 */
2203 void
moea64_kremove(vm_offset_t va)2204 moea64_kremove(vm_offset_t va)
2205 {
2206 moea64_remove(kernel_pmap, va, va + PAGE_SIZE);
2207 }
2208
2209 /*
2210 * Provide a kernel pointer corresponding to a given userland pointer.
2211 * The returned pointer is valid until the next time this function is
2212 * called in this thread. This is used internally in copyin/copyout.
2213 */
2214 static int
moea64_map_user_ptr(pmap_t pm,volatile const void * uaddr,void ** kaddr,size_t ulen,size_t * klen)2215 moea64_map_user_ptr(pmap_t pm, volatile const void *uaddr,
2216 void **kaddr, size_t ulen, size_t *klen)
2217 {
2218 size_t l;
2219 #ifdef __powerpc64__
2220 struct slb *slb;
2221 #endif
2222 register_t slbv;
2223
2224 *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK);
2225 l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr);
2226 if (l > ulen)
2227 l = ulen;
2228 if (klen)
2229 *klen = l;
2230 else if (l != ulen)
2231 return (EFAULT);
2232
2233 #ifdef __powerpc64__
2234 /* Try lockless look-up first */
2235 slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr);
2236
2237 if (slb == NULL) {
2238 /* If it isn't there, we need to pre-fault the VSID */
2239 PMAP_LOCK(pm);
2240 slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT;
2241 PMAP_UNLOCK(pm);
2242 } else {
2243 slbv = slb->slbv;
2244 }
2245
2246 /* Mark segment no-execute */
2247 slbv |= SLBV_N;
2248 #else
2249 slbv = va_to_vsid(pm, (vm_offset_t)uaddr);
2250
2251 /* Mark segment no-execute */
2252 slbv |= SR_N;
2253 #endif
2254
2255 /* If we have already set this VSID, we can just return */
2256 if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv)
2257 return (0);
2258
2259 __asm __volatile("isync");
2260 curthread->td_pcb->pcb_cpu.aim.usr_segm =
2261 (uintptr_t)uaddr >> ADDR_SR_SHFT;
2262 curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv;
2263 #ifdef __powerpc64__
2264 __asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
2265 "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
2266 #else
2267 __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv));
2268 #endif
2269
2270 return (0);
2271 }
2272
2273 /*
2274 * Figure out where a given kernel pointer (usually in a fault) points
2275 * to from the VM's perspective, potentially remapping into userland's
2276 * address space.
2277 */
2278 static int
moea64_decode_kernel_ptr(vm_offset_t addr,int * is_user,vm_offset_t * decoded_addr)2279 moea64_decode_kernel_ptr(vm_offset_t addr, int *is_user,
2280 vm_offset_t *decoded_addr)
2281 {
2282 vm_offset_t user_sr;
2283
2284 if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
2285 user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm;
2286 addr &= ADDR_PIDX | ADDR_POFF;
2287 addr |= user_sr << ADDR_SR_SHFT;
2288 *decoded_addr = addr;
2289 *is_user = 1;
2290 } else {
2291 *decoded_addr = addr;
2292 *is_user = 0;
2293 }
2294
2295 return (0);
2296 }
2297
2298 /*
2299 * Map a range of physical addresses into kernel virtual address space.
2300 *
2301 * The value passed in *virt is a suggested virtual address for the mapping.
2302 * Architectures which can support a direct-mapped physical to virtual region
2303 * can return the appropriate address within that region, leaving '*virt'
2304 * unchanged. Other architectures should map the pages starting at '*virt' and
2305 * update '*virt' with the first usable address after the mapped region.
2306 */
2307 void *
moea64_map(vm_offset_t * virt,vm_paddr_t pa_start,vm_paddr_t pa_end,int prot)2308 moea64_map(vm_offset_t *virt, vm_paddr_t pa_start,
2309 vm_paddr_t pa_end, int prot)
2310 {
2311 vm_offset_t sva, va;
2312
2313 if (hw_direct_map) {
2314 /*
2315 * Check if every page in the region is covered by the direct
2316 * map. The direct map covers all of physical memory. Use
2317 * moea64_calc_wimg() as a shortcut to see if the page is in
2318 * physical memory as a way to see if the direct map covers it.
2319 */
2320 for (va = pa_start; va < pa_end; va += PAGE_SIZE)
2321 if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M)
2322 break;
2323 if (va == pa_end)
2324 return (PHYS_TO_DMAP(pa_start));
2325 }
2326 sva = *virt;
2327 va = sva;
2328 /* XXX respect prot argument */
2329 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
2330 moea64_kenter(va, pa_start);
2331 *virt = va;
2332
2333 return ((void *)sva);
2334 }
2335
2336 /*
2337 * Returns true if the pmap's pv is one of the first
2338 * 16 pvs linked to from this page. This count may
2339 * be changed upwards or downwards in the future; it
2340 * is only necessary that true be returned for a small
2341 * subset of pmaps for proper page aging.
2342 */
2343 bool
moea64_page_exists_quick(pmap_t pmap,vm_page_t m)2344 moea64_page_exists_quick(pmap_t pmap, vm_page_t m)
2345 {
2346 int loops;
2347 struct pvo_entry *pvo;
2348 bool rv;
2349
2350 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2351 ("moea64_page_exists_quick: page %p is not managed", m));
2352 loops = 0;
2353 rv = false;
2354 PV_PAGE_RD_LOCK(m);
2355 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2356 if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) {
2357 rv = true;
2358 break;
2359 }
2360 if (++loops >= 16)
2361 break;
2362 }
2363 PV_PAGE_UNLOCK(m);
2364 return (rv);
2365 }
2366
2367 void
moea64_page_init(vm_page_t m)2368 moea64_page_init(vm_page_t m)
2369 {
2370
2371 m->md.mdpg_attrs = 0;
2372 m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT;
2373 LIST_INIT(&m->md.mdpg_pvoh);
2374 }
2375
2376 /*
2377 * Return the number of managed mappings to the given physical page
2378 * that are wired.
2379 */
2380 int
moea64_page_wired_mappings(vm_page_t m)2381 moea64_page_wired_mappings(vm_page_t m)
2382 {
2383 struct pvo_entry *pvo;
2384 int count;
2385
2386 count = 0;
2387 if ((m->oflags & VPO_UNMANAGED) != 0)
2388 return (count);
2389 PV_PAGE_RD_LOCK(m);
2390 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
2391 if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED)
2392 count++;
2393 PV_PAGE_UNLOCK(m);
2394 return (count);
2395 }
2396
2397 static uintptr_t moea64_vsidcontext;
2398
2399 uintptr_t
moea64_get_unique_vsid(void)2400 moea64_get_unique_vsid(void) {
2401 u_int entropy;
2402 register_t hash;
2403 uint32_t mask;
2404 int i;
2405
2406 entropy = 0;
2407 __asm __volatile("mftb %0" : "=r"(entropy));
2408
2409 mtx_lock(&moea64_slb_mutex);
2410 for (i = 0; i < NVSIDS; i += VSID_NBPW) {
2411 u_int n;
2412
2413 /*
2414 * Create a new value by multiplying by a prime and adding in
2415 * entropy from the timebase register. This is to make the
2416 * VSID more random so that the PT hash function collides
2417 * less often. (Note that the prime casues gcc to do shifts
2418 * instead of a multiply.)
2419 */
2420 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
2421 hash = moea64_vsidcontext & (NVSIDS - 1);
2422 if (hash == 0) /* 0 is special, avoid it */
2423 continue;
2424 n = hash >> 5;
2425 mask = 1 << (hash & (VSID_NBPW - 1));
2426 hash = (moea64_vsidcontext & VSID_HASHMASK);
2427 if (moea64_vsid_bitmap[n] & mask) { /* collision? */
2428 /* anything free in this bucket? */
2429 if (moea64_vsid_bitmap[n] == 0xffffffff) {
2430 entropy = (moea64_vsidcontext >> 20);
2431 continue;
2432 }
2433 i = ffs(~moea64_vsid_bitmap[n]) - 1;
2434 mask = 1 << i;
2435 hash &= rounddown2(VSID_HASHMASK, VSID_NBPW);
2436 hash |= i;
2437 }
2438 if (hash == VSID_VRMA) /* also special, avoid this too */
2439 continue;
2440 KASSERT(!(moea64_vsid_bitmap[n] & mask),
2441 ("Allocating in-use VSID %#zx\n", hash));
2442 moea64_vsid_bitmap[n] |= mask;
2443 mtx_unlock(&moea64_slb_mutex);
2444 return (hash);
2445 }
2446
2447 mtx_unlock(&moea64_slb_mutex);
2448 panic("%s: out of segments",__func__);
2449 }
2450
2451 #ifdef __powerpc64__
2452 int
moea64_pinit(pmap_t pmap)2453 moea64_pinit(pmap_t pmap)
2454 {
2455
2456 RB_INIT(&pmap->pmap_pvo);
2457
2458 pmap->pm_slb_tree_root = slb_alloc_tree();
2459 pmap->pm_slb = slb_alloc_user_cache();
2460 pmap->pm_slb_len = 0;
2461
2462 return (1);
2463 }
2464 #else
2465 int
moea64_pinit(pmap_t pmap)2466 moea64_pinit(pmap_t pmap)
2467 {
2468 int i;
2469 uint32_t hash;
2470
2471 RB_INIT(&pmap->pmap_pvo);
2472
2473 if (pmap_bootstrapped)
2474 pmap->pmap_phys = (pmap_t)moea64_kextract((vm_offset_t)pmap);
2475 else
2476 pmap->pmap_phys = pmap;
2477
2478 /*
2479 * Allocate some segment registers for this pmap.
2480 */
2481 hash = moea64_get_unique_vsid();
2482
2483 for (i = 0; i < 16; i++)
2484 pmap->pm_sr[i] = VSID_MAKE(i, hash);
2485
2486 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
2487
2488 return (1);
2489 }
2490 #endif
2491
2492 /*
2493 * Initialize the pmap associated with process 0.
2494 */
2495 void
moea64_pinit0(pmap_t pm)2496 moea64_pinit0(pmap_t pm)
2497 {
2498
2499 PMAP_LOCK_INIT(pm);
2500 moea64_pinit(pm);
2501 bzero(&pm->pm_stats, sizeof(pm->pm_stats));
2502 }
2503
2504 /*
2505 * Set the physical protection on the specified range of this map as requested.
2506 */
2507 static void
moea64_pvo_protect(pmap_t pm,struct pvo_entry * pvo,vm_prot_t prot)2508 moea64_pvo_protect( pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
2509 {
2510 struct vm_page *pg;
2511 vm_prot_t oldprot;
2512 int32_t refchg;
2513
2514 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2515
2516 /*
2517 * Change the protection of the page.
2518 */
2519 oldprot = pvo->pvo_pte.prot;
2520 pvo->pvo_pte.prot = prot;
2521 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2522
2523 /*
2524 * If the PVO is in the page table, update mapping
2525 */
2526 refchg = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2527 if (refchg < 0)
2528 refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
2529
2530 if (pm != kernel_pmap && pg != NULL &&
2531 (pg->a.flags & PGA_EXECUTABLE) == 0 &&
2532 (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
2533 if ((pg->oflags & VPO_UNMANAGED) == 0)
2534 vm_page_aflag_set(pg, PGA_EXECUTABLE);
2535 moea64_syncicache(pm, PVO_VADDR(pvo),
2536 PVO_PADDR(pvo), PAGE_SIZE);
2537 }
2538
2539 /*
2540 * Update vm about the REF/CHG bits if the page is managed and we have
2541 * removed write access.
2542 */
2543 if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
2544 (oldprot & VM_PROT_WRITE)) {
2545 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2546 if (refchg & LPTE_CHG)
2547 vm_page_dirty(pg);
2548 if (refchg & LPTE_REF)
2549 vm_page_aflag_set(pg, PGA_REFERENCED);
2550 }
2551 }
2552
2553 void
moea64_protect(pmap_t pm,vm_offset_t sva,vm_offset_t eva,vm_prot_t prot)2554 moea64_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2555 vm_prot_t prot)
2556 {
2557 struct pvo_entry *pvo, key;
2558
2559 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
2560 sva, eva, prot);
2561
2562 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
2563 ("moea64_protect: non current pmap"));
2564
2565 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2566 moea64_remove(pm, sva, eva);
2567 return;
2568 }
2569
2570 PMAP_LOCK(pm);
2571 key.pvo_vaddr = sva;
2572 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2573 pvo != NULL && PVO_VADDR(pvo) < eva;
2574 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
2575 if (PVO_IS_SP(pvo)) {
2576 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2577 pvo = moea64_sp_protect(pvo, prot);
2578 continue;
2579 } else {
2580 CTR1(KTR_PMAP, "%s: demote before protect",
2581 __func__);
2582 moea64_sp_demote(pvo);
2583 }
2584 }
2585 moea64_pvo_protect(pm, pvo, prot);
2586 }
2587 PMAP_UNLOCK(pm);
2588 }
2589
2590 /*
2591 * Map a list of wired pages into kernel virtual address space. This is
2592 * intended for temporary mappings which do not need page modification or
2593 * references recorded. Existing mappings in the region are overwritten.
2594 */
2595 void
moea64_qenter(void * sva,vm_page_t * m,int count)2596 moea64_qenter(void *sva, vm_page_t *m, int count)
2597 {
2598 vm_offset_t va;
2599
2600 va = (vm_offset_t)sva;
2601 while (count-- > 0) {
2602 moea64_kenter(va, VM_PAGE_TO_PHYS(*m));
2603 va += PAGE_SIZE;
2604 m++;
2605 }
2606 }
2607
2608 /*
2609 * Remove page mappings from kernel virtual address space. Intended for
2610 * temporary mappings entered by moea64_qenter.
2611 */
2612 void
moea64_qremove(void * sva,int count)2613 moea64_qremove(void *sva, int count)
2614 {
2615 vm_offset_t va;
2616
2617 va = (vm_offset_t)sva;
2618 while (count-- > 0) {
2619 moea64_kremove(va);
2620 va += PAGE_SIZE;
2621 }
2622 }
2623
2624 void
moea64_release_vsid(uint64_t vsid)2625 moea64_release_vsid(uint64_t vsid)
2626 {
2627 int idx, mask;
2628
2629 mtx_lock(&moea64_slb_mutex);
2630 idx = vsid & (NVSIDS-1);
2631 mask = 1 << (idx % VSID_NBPW);
2632 idx /= VSID_NBPW;
2633 KASSERT(moea64_vsid_bitmap[idx] & mask,
2634 ("Freeing unallocated VSID %#jx", vsid));
2635 moea64_vsid_bitmap[idx] &= ~mask;
2636 mtx_unlock(&moea64_slb_mutex);
2637 }
2638
2639 void
moea64_release(pmap_t pmap)2640 moea64_release(pmap_t pmap)
2641 {
2642
2643 /*
2644 * Free segment registers' VSIDs
2645 */
2646 #ifdef __powerpc64__
2647 slb_free_tree(pmap);
2648 slb_free_user_cache(pmap->pm_slb);
2649 #else
2650 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
2651
2652 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
2653 #endif
2654 }
2655
2656 /*
2657 * Remove all pages mapped by the specified pmap
2658 */
2659 void
moea64_remove_pages(pmap_t pm)2660 moea64_remove_pages(pmap_t pm)
2661 {
2662 struct pvo_entry *pvo, *tpvo;
2663 struct pvo_dlist tofree;
2664
2665 SLIST_INIT(&tofree);
2666
2667 PMAP_LOCK(pm);
2668 RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
2669 if (pvo->pvo_vaddr & PVO_WIRED)
2670 continue;
2671
2672 /*
2673 * For locking reasons, remove this from the page table and
2674 * pmap, but save delinking from the vm_page for a second
2675 * pass
2676 */
2677 moea64_pvo_remove_from_pmap(pvo);
2678 SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink);
2679 }
2680 PMAP_UNLOCK(pm);
2681
2682 while (!SLIST_EMPTY(&tofree)) {
2683 pvo = SLIST_FIRST(&tofree);
2684 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2685 moea64_pvo_remove_from_page(pvo);
2686 free_pvo_entry(pvo);
2687 }
2688 }
2689
2690 static void
moea64_remove_locked(pmap_t pm,vm_offset_t sva,vm_offset_t eva,struct pvo_dlist * tofree)2691 moea64_remove_locked(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2692 struct pvo_dlist *tofree)
2693 {
2694 struct pvo_entry *pvo, *tpvo, key;
2695
2696 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2697
2698 key.pvo_vaddr = sva;
2699 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2700 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
2701 if (PVO_IS_SP(pvo)) {
2702 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2703 tpvo = moea64_sp_remove(pvo, tofree);
2704 continue;
2705 } else {
2706 CTR1(KTR_PMAP, "%s: demote before remove",
2707 __func__);
2708 moea64_sp_demote(pvo);
2709 }
2710 }
2711 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
2712
2713 /*
2714 * For locking reasons, remove this from the page table and
2715 * pmap, but save delinking from the vm_page for a second
2716 * pass
2717 */
2718 moea64_pvo_remove_from_pmap(pvo);
2719 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
2720 }
2721 }
2722
2723 /*
2724 * Remove the given range of addresses from the specified map.
2725 */
2726 void
moea64_remove(pmap_t pm,vm_offset_t sva,vm_offset_t eva)2727 moea64_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
2728 {
2729 struct pvo_entry *pvo;
2730 struct pvo_dlist tofree;
2731
2732 /*
2733 * Perform an unsynchronized read. This is, however, safe.
2734 */
2735 if (pm->pm_stats.resident_count == 0)
2736 return;
2737
2738 SLIST_INIT(&tofree);
2739 PMAP_LOCK(pm);
2740 moea64_remove_locked(pm, sva, eva, &tofree);
2741 PMAP_UNLOCK(pm);
2742
2743 while (!SLIST_EMPTY(&tofree)) {
2744 pvo = SLIST_FIRST(&tofree);
2745 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2746 moea64_pvo_remove_from_page(pvo);
2747 free_pvo_entry(pvo);
2748 }
2749 }
2750
2751 /*
2752 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
2753 * will reflect changes in pte's back to the vm_page.
2754 */
2755 void
moea64_remove_all(vm_page_t m)2756 moea64_remove_all(vm_page_t m)
2757 {
2758 struct pvo_entry *pvo, *next_pvo;
2759 struct pvo_head freequeue;
2760 int wasdead;
2761 pmap_t pmap;
2762
2763 LIST_INIT(&freequeue);
2764
2765 PV_PAGE_WR_LOCK(m);
2766 LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
2767 pmap = pvo->pvo_pmap;
2768 PMAP_LOCK(pmap);
2769 wasdead = (pvo->pvo_vaddr & PVO_DEAD);
2770 if (!wasdead) {
2771 if (PVO_IS_SP(pvo)) {
2772 CTR1(KTR_PMAP, "%s: demote before remove_all",
2773 __func__);
2774 moea64_sp_demote(pvo);
2775 }
2776 moea64_pvo_remove_from_pmap(pvo);
2777 }
2778 moea64_pvo_remove_from_page_locked(pvo, m);
2779 if (!wasdead)
2780 LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
2781 PMAP_UNLOCK(pmap);
2782
2783 }
2784 KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));
2785 KASSERT((m->a.flags & PGA_WRITEABLE) == 0, ("Page still writable"));
2786 PV_PAGE_UNLOCK(m);
2787
2788 /* Clean up UMA allocations */
2789 LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo)
2790 free_pvo_entry(pvo);
2791 }
2792
2793 /*
2794 * Allocate a physical page of memory directly from the phys_avail map.
2795 * Can only be called from moea64_bootstrap before avail start and end are
2796 * calculated.
2797 */
2798 vm_paddr_t
moea64_bootstrap_alloc(vm_size_t size,vm_size_t align)2799 moea64_bootstrap_alloc(vm_size_t size, vm_size_t align)
2800 {
2801 vm_paddr_t s, e;
2802 int i, j;
2803
2804 size = round_page(size);
2805 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2806 if (align != 0)
2807 s = roundup2(phys_avail[i], align);
2808 else
2809 s = phys_avail[i];
2810 e = s + size;
2811
2812 if (s < phys_avail[i] || e > phys_avail[i + 1])
2813 continue;
2814
2815 if (s + size > platform_real_maxaddr())
2816 continue;
2817
2818 if (s == phys_avail[i]) {
2819 phys_avail[i] += size;
2820 } else if (e == phys_avail[i + 1]) {
2821 phys_avail[i + 1] -= size;
2822 } else {
2823 for (j = phys_avail_count * 2; j > i; j -= 2) {
2824 phys_avail[j] = phys_avail[j - 2];
2825 phys_avail[j + 1] = phys_avail[j - 1];
2826 }
2827
2828 phys_avail[i + 3] = phys_avail[i + 1];
2829 phys_avail[i + 1] = s;
2830 phys_avail[i + 2] = e;
2831 phys_avail_count++;
2832 }
2833
2834 return (s);
2835 }
2836 panic("moea64_bootstrap_alloc: could not allocate memory");
2837 }
2838
2839 static int
moea64_pvo_enter(struct pvo_entry * pvo,struct pvo_head * pvo_head,struct pvo_entry ** oldpvop)2840 moea64_pvo_enter(struct pvo_entry *pvo, struct pvo_head *pvo_head,
2841 struct pvo_entry **oldpvop)
2842 {
2843 struct pvo_entry *old_pvo;
2844 int err;
2845
2846 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2847
2848 STAT_MOEA64(moea64_pvo_enter_calls++);
2849
2850 /*
2851 * Add to pmap list
2852 */
2853 old_pvo = RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2854
2855 if (old_pvo != NULL) {
2856 if (oldpvop != NULL)
2857 *oldpvop = old_pvo;
2858 return (EEXIST);
2859 }
2860
2861 if (pvo_head != NULL) {
2862 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
2863 }
2864
2865 if (pvo->pvo_vaddr & PVO_WIRED)
2866 pvo->pvo_pmap->pm_stats.wired_count++;
2867 pvo->pvo_pmap->pm_stats.resident_count++;
2868
2869 /*
2870 * Insert it into the hardware page table
2871 */
2872 err = moea64_pte_insert(pvo);
2873 if (err != 0) {
2874 panic("moea64_pvo_enter: overflow");
2875 }
2876
2877 STAT_MOEA64(moea64_pvo_entries++);
2878
2879 if (pvo->pvo_pmap == kernel_pmap)
2880 isync();
2881
2882 #ifdef __powerpc64__
2883 /*
2884 * Make sure all our bootstrap mappings are in the SLB as soon
2885 * as virtual memory is switched on.
2886 */
2887 if (!pmap_bootstrapped)
2888 moea64_bootstrap_slb_prefault(PVO_VADDR(pvo),
2889 pvo->pvo_vaddr & PVO_LARGE);
2890 #endif
2891
2892 return (0);
2893 }
2894
2895 static void
moea64_pvo_remove_from_pmap(struct pvo_entry * pvo)2896 moea64_pvo_remove_from_pmap(struct pvo_entry *pvo)
2897 {
2898 struct vm_page *pg;
2899 int32_t refchg;
2900
2901 KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap"));
2902 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2903 KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO"));
2904
2905 /*
2906 * If there is an active pte entry, we need to deactivate it
2907 */
2908 refchg = moea64_pte_unset(pvo);
2909 if (refchg < 0) {
2910 /*
2911 * If it was evicted from the page table, be pessimistic and
2912 * dirty the page.
2913 */
2914 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
2915 refchg = LPTE_CHG;
2916 else
2917 refchg = 0;
2918 }
2919
2920 /*
2921 * Update our statistics.
2922 */
2923 pvo->pvo_pmap->pm_stats.resident_count--;
2924 if (pvo->pvo_vaddr & PVO_WIRED)
2925 pvo->pvo_pmap->pm_stats.wired_count--;
2926
2927 /*
2928 * Remove this PVO from the pmap list.
2929 */
2930 RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2931
2932 /*
2933 * Mark this for the next sweep
2934 */
2935 pvo->pvo_vaddr |= PVO_DEAD;
2936
2937 /* Send RC bits to VM */
2938 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2939 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2940 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2941 if (pg != NULL) {
2942 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2943 if (refchg & LPTE_CHG)
2944 vm_page_dirty(pg);
2945 if (refchg & LPTE_REF)
2946 vm_page_aflag_set(pg, PGA_REFERENCED);
2947 }
2948 }
2949 }
2950
2951 static inline void
moea64_pvo_remove_from_page_locked(struct pvo_entry * pvo,vm_page_t m)2952 moea64_pvo_remove_from_page_locked(struct pvo_entry *pvo,
2953 vm_page_t m)
2954 {
2955
2956 KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page"));
2957
2958 /* Use NULL pmaps as a sentinel for races in page deletion */
2959 if (pvo->pvo_pmap == NULL)
2960 return;
2961 pvo->pvo_pmap = NULL;
2962
2963 /*
2964 * Update vm about page writeability/executability if managed
2965 */
2966 PV_LOCKASSERT(PVO_PADDR(pvo));
2967 if (pvo->pvo_vaddr & PVO_MANAGED) {
2968 if (m != NULL) {
2969 LIST_REMOVE(pvo, pvo_vlink);
2970 if (LIST_EMPTY(vm_page_to_pvoh(m)))
2971 vm_page_aflag_clear(m,
2972 PGA_WRITEABLE | PGA_EXECUTABLE);
2973 }
2974 }
2975
2976 STAT_MOEA64(moea64_pvo_entries--);
2977 STAT_MOEA64(moea64_pvo_remove_calls++);
2978 }
2979
2980 static void
moea64_pvo_remove_from_page(struct pvo_entry * pvo)2981 moea64_pvo_remove_from_page(struct pvo_entry *pvo)
2982 {
2983 vm_page_t pg = NULL;
2984
2985 if (pvo->pvo_vaddr & PVO_MANAGED)
2986 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2987
2988 PV_WR_LOCK(PVO_PADDR(pvo));
2989 moea64_pvo_remove_from_page_locked(pvo, pg);
2990 PV_UNLOCK(PVO_PADDR(pvo));
2991 }
2992
2993 static struct pvo_entry *
moea64_pvo_find_va(pmap_t pm,vm_offset_t va)2994 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
2995 {
2996 struct pvo_entry key;
2997
2998 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2999
3000 key.pvo_vaddr = va & ~ADDR_POFF;
3001 return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
3002 }
3003
3004 static bool
moea64_query_bit(vm_page_t m,uint64_t ptebit)3005 moea64_query_bit(vm_page_t m, uint64_t ptebit)
3006 {
3007 struct pvo_entry *pvo;
3008 int64_t ret;
3009 bool rv;
3010 vm_page_t sp;
3011
3012 /*
3013 * See if this bit is stored in the page already.
3014 *
3015 * For superpages, the bit is stored in the first vm page.
3016 */
3017 if ((m->md.mdpg_attrs & ptebit) != 0 ||
3018 ((sp = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK)) != NULL &&
3019 (sp->md.mdpg_attrs & (ptebit | MDPG_ATTR_SP)) ==
3020 (ptebit | MDPG_ATTR_SP)))
3021 return (true);
3022
3023 /*
3024 * Examine each PTE. Sync so that any pending REF/CHG bits are
3025 * flushed to the PTEs.
3026 */
3027 rv = false;
3028 powerpc_sync();
3029 PV_PAGE_RD_LOCK(m);
3030 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3031 if (PVO_IS_SP(pvo)) {
3032 ret = moea64_sp_query(pvo, ptebit);
3033 /*
3034 * If SP was not demoted, check its REF/CHG bits here.
3035 */
3036 if (ret != -1) {
3037 if ((ret & ptebit) != 0) {
3038 rv = true;
3039 break;
3040 }
3041 continue;
3042 }
3043 /* else, fallthrough */
3044 }
3045
3046 ret = 0;
3047
3048 /*
3049 * See if this pvo has a valid PTE. if so, fetch the
3050 * REF/CHG bits from the valid PTE. If the appropriate
3051 * ptebit is set, return success.
3052 */
3053 PMAP_LOCK(pvo->pvo_pmap);
3054 if (!(pvo->pvo_vaddr & PVO_DEAD))
3055 ret = moea64_pte_synch(pvo);
3056 PMAP_UNLOCK(pvo->pvo_pmap);
3057
3058 if (ret > 0) {
3059 atomic_set_32(&m->md.mdpg_attrs,
3060 ret & (LPTE_CHG | LPTE_REF));
3061 if (ret & ptebit) {
3062 rv = true;
3063 break;
3064 }
3065 }
3066 }
3067 PV_PAGE_UNLOCK(m);
3068
3069 return (rv);
3070 }
3071
3072 static u_int
moea64_clear_bit(vm_page_t m,u_int64_t ptebit)3073 moea64_clear_bit(vm_page_t m, u_int64_t ptebit)
3074 {
3075 u_int count;
3076 struct pvo_entry *pvo;
3077 int64_t ret;
3078
3079 /*
3080 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
3081 * we can reset the right ones).
3082 */
3083 powerpc_sync();
3084
3085 /*
3086 * For each pvo entry, clear the pte's ptebit.
3087 */
3088 count = 0;
3089 PV_PAGE_WR_LOCK(m);
3090 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3091 if (PVO_IS_SP(pvo)) {
3092 if ((ret = moea64_sp_clear(pvo, m, ptebit)) != -1) {
3093 count += ret;
3094 continue;
3095 }
3096 }
3097 ret = 0;
3098
3099 PMAP_LOCK(pvo->pvo_pmap);
3100 if (!(pvo->pvo_vaddr & PVO_DEAD))
3101 ret = moea64_pte_clear(pvo, ptebit);
3102 PMAP_UNLOCK(pvo->pvo_pmap);
3103
3104 if (ret > 0 && (ret & ptebit))
3105 count++;
3106 }
3107 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
3108 PV_PAGE_UNLOCK(m);
3109
3110 return (count);
3111 }
3112
3113 int
moea64_dev_direct_mapped(vm_paddr_t pa,vm_size_t size)3114 moea64_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
3115 {
3116 struct pvo_entry *pvo, key;
3117 vm_offset_t ppa;
3118 int error = 0;
3119
3120 if (hw_direct_map && mem_valid(pa, size) == 0)
3121 return (0);
3122
3123 PMAP_LOCK(kernel_pmap);
3124 ppa = pa & ~ADDR_POFF;
3125 key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa;
3126 for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
3127 ppa < pa + size; ppa += PAGE_SIZE,
3128 pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
3129 if (pvo == NULL || PVO_PADDR(pvo) != ppa) {
3130 error = EFAULT;
3131 break;
3132 }
3133 }
3134 PMAP_UNLOCK(kernel_pmap);
3135
3136 return (error);
3137 }
3138
3139 /*
3140 * Map a set of physical memory pages into the kernel virtual
3141 * address space. Return a pointer to where it is mapped. This
3142 * routine is intended to be used for mapping device memory,
3143 * NOT real memory.
3144 */
3145 void *
moea64_mapdev_attr(vm_paddr_t pa,vm_size_t size,vm_memattr_t ma)3146 moea64_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
3147 {
3148 char *va;
3149 vm_offset_t tmpva, ppa, offset;
3150
3151 ppa = trunc_page(pa);
3152 offset = pa & PAGE_MASK;
3153 size = roundup2(offset + size, PAGE_SIZE);
3154
3155 va = kva_alloc(size);
3156
3157 if (va == NULL)
3158 panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
3159
3160 for (tmpva = (vm_offset_t)va; size > 0;) {
3161 moea64_kenter_attr(tmpva, ppa, ma);
3162 size -= PAGE_SIZE;
3163 tmpva += PAGE_SIZE;
3164 ppa += PAGE_SIZE;
3165 }
3166
3167 return (va + offset);
3168 }
3169
3170 void *
moea64_mapdev(vm_paddr_t pa,vm_size_t size)3171 moea64_mapdev(vm_paddr_t pa, vm_size_t size)
3172 {
3173
3174 return moea64_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT);
3175 }
3176
3177 void
moea64_unmapdev(void * va,vm_size_t size)3178 moea64_unmapdev(void *va, vm_size_t size)
3179 {
3180 void *base;
3181 vm_offset_t offset;
3182
3183 base = trunc_page(va);
3184 offset = (vm_offset_t)va & PAGE_MASK;
3185 size = roundup2(offset + size, PAGE_SIZE);
3186
3187 moea64_qremove(base, atop(size));
3188 kva_free(base, size);
3189 }
3190
3191 void
moea64_sync_icache(pmap_t pm,vm_offset_t va,vm_size_t sz)3192 moea64_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3193 {
3194 struct pvo_entry *pvo;
3195 vm_offset_t lim;
3196 vm_paddr_t pa;
3197 vm_size_t len;
3198
3199 if (__predict_false(pm == NULL))
3200 pm = &curthread->td_proc->p_vmspace->vm_pmap;
3201
3202 PMAP_LOCK(pm);
3203 while (sz > 0) {
3204 lim = round_page(va+1);
3205 len = MIN(lim - va, sz);
3206 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
3207 if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) {
3208 pa = PVO_PADDR(pvo) | (va & ADDR_POFF);
3209 moea64_syncicache(pm, va, pa, len);
3210 }
3211 va += len;
3212 sz -= len;
3213 }
3214 PMAP_UNLOCK(pm);
3215 }
3216
3217 void
moea64_dumpsys_map(vm_paddr_t pa,size_t sz,void ** va)3218 moea64_dumpsys_map(vm_paddr_t pa, size_t sz, void **va)
3219 {
3220
3221 *va = (void *)(uintptr_t)pa;
3222 }
3223
3224 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
3225
3226 void
moea64_scan_init(void)3227 moea64_scan_init(void)
3228 {
3229 struct pvo_entry *pvo;
3230 vm_offset_t va;
3231 int i;
3232
3233 if (!do_minidump) {
3234 /* Initialize phys. segments for dumpsys(). */
3235 memset(&dump_map, 0, sizeof(dump_map));
3236 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
3237 for (i = 0; i < pregions_sz; i++) {
3238 dump_map[i].pa_start = pregions[i].mr_start;
3239 dump_map[i].pa_size = pregions[i].mr_size;
3240 }
3241 return;
3242 }
3243
3244 /* Virtual segments for minidumps: */
3245 memset(&dump_map, 0, sizeof(dump_map));
3246
3247 /* 1st: kernel .data and .bss. */
3248 dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
3249 dump_map[0].pa_size = round_page((uintptr_t)_end) -
3250 dump_map[0].pa_start;
3251
3252 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
3253 dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr;
3254 dump_map[1].pa_size = round_page(msgbufp->msg_size);
3255
3256 /* 3rd: kernel VM. */
3257 va = dump_map[1].pa_start + dump_map[1].pa_size;
3258 /* Find start of next chunk (from va). */
3259 while (va < virtual_end) {
3260 /* Don't dump the buffer cache. */
3261 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
3262 va = kmi.buffer_eva;
3263 continue;
3264 }
3265 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3266 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
3267 break;
3268 va += PAGE_SIZE;
3269 }
3270 if (va < virtual_end) {
3271 dump_map[2].pa_start = va;
3272 va += PAGE_SIZE;
3273 /* Find last page in chunk. */
3274 while (va < virtual_end) {
3275 /* Don't run into the buffer cache. */
3276 if (va == kmi.buffer_sva)
3277 break;
3278 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3279 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD))
3280 break;
3281 va += PAGE_SIZE;
3282 }
3283 dump_map[2].pa_size = va - dump_map[2].pa_start;
3284 }
3285 }
3286
3287 #ifdef __powerpc64__
3288
3289 static size_t
moea64_scan_pmap(struct bitset * dump_bitset)3290 moea64_scan_pmap(struct bitset *dump_bitset)
3291 {
3292 struct pvo_entry *pvo;
3293 vm_paddr_t pa, pa_end;
3294 vm_offset_t va, pgva, kstart, kend, kstart_lp, kend_lp;
3295 uint64_t lpsize;
3296
3297 lpsize = moea64_large_page_size;
3298 kstart = trunc_page((vm_offset_t)_etext);
3299 kend = round_page((vm_offset_t)_end);
3300 kstart_lp = kstart & ~moea64_large_page_mask;
3301 kend_lp = (kend + moea64_large_page_mask) & ~moea64_large_page_mask;
3302
3303 CTR4(KTR_PMAP, "moea64_scan_pmap: kstart=0x%016lx, kend=0x%016lx, "
3304 "kstart_lp=0x%016lx, kend_lp=0x%016lx",
3305 kstart, kend, kstart_lp, kend_lp);
3306
3307 PMAP_LOCK(kernel_pmap);
3308 RB_FOREACH(pvo, pvo_tree, &kernel_pmap->pmap_pvo) {
3309 va = pvo->pvo_vaddr;
3310
3311 if (va & PVO_DEAD)
3312 continue;
3313
3314 /* Skip DMAP (except kernel area) */
3315 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) {
3316 if (va & PVO_LARGE) {
3317 pgva = va & ~moea64_large_page_mask;
3318 if (pgva < kstart_lp || pgva >= kend_lp)
3319 continue;
3320 } else {
3321 pgva = trunc_page(va);
3322 if (pgva < kstart || pgva >= kend)
3323 continue;
3324 }
3325 }
3326
3327 pa = PVO_PADDR(pvo);
3328
3329 if (va & PVO_LARGE) {
3330 pa_end = pa + lpsize;
3331 for (; pa < pa_end; pa += PAGE_SIZE) {
3332 if (vm_phys_is_dumpable(pa))
3333 vm_page_dump_add(dump_bitset, pa);
3334 }
3335 } else {
3336 if (vm_phys_is_dumpable(pa))
3337 vm_page_dump_add(dump_bitset, pa);
3338 }
3339 }
3340 PMAP_UNLOCK(kernel_pmap);
3341
3342 return (sizeof(struct lpte) * moea64_pteg_count * 8);
3343 }
3344
3345 static struct dump_context dump_ctx;
3346
3347 static void *
moea64_dump_pmap_init(unsigned blkpgs)3348 moea64_dump_pmap_init(unsigned blkpgs)
3349 {
3350 dump_ctx.ptex = 0;
3351 dump_ctx.ptex_end = moea64_pteg_count * 8;
3352 dump_ctx.blksz = blkpgs * PAGE_SIZE;
3353 return (&dump_ctx);
3354 }
3355
3356 #else
3357
3358 static size_t
moea64_scan_pmap(struct bitset * dump_bitset __unused)3359 moea64_scan_pmap(struct bitset *dump_bitset __unused)
3360 {
3361 return (0);
3362 }
3363
3364 static void *
moea64_dump_pmap_init(unsigned blkpgs)3365 moea64_dump_pmap_init(unsigned blkpgs)
3366 {
3367 return (NULL);
3368 }
3369
3370 #endif
3371
3372 #ifdef __powerpc64__
3373 static void
moea64_map_range(vm_offset_t va,vm_paddr_t pa,vm_size_t npages)3374 moea64_map_range(vm_offset_t va, vm_paddr_t pa, vm_size_t npages)
3375 {
3376
3377 for (; npages > 0; --npages) {
3378 if (moea64_large_page_size != 0 &&
3379 (pa & moea64_large_page_mask) == 0 &&
3380 (va & moea64_large_page_mask) == 0 &&
3381 npages >= (moea64_large_page_size >> PAGE_SHIFT)) {
3382 PMAP_LOCK(kernel_pmap);
3383 moea64_kenter_large(va, pa, 0, 0);
3384 PMAP_UNLOCK(kernel_pmap);
3385 pa += moea64_large_page_size;
3386 va += moea64_large_page_size;
3387 npages -= (moea64_large_page_size >> PAGE_SHIFT) - 1;
3388 } else {
3389 moea64_kenter(va, pa);
3390 pa += PAGE_SIZE;
3391 va += PAGE_SIZE;
3392 }
3393 }
3394 }
3395
3396 static void
moea64_page_array_startup(long pages)3397 moea64_page_array_startup(long pages)
3398 {
3399 long dom_pages[MAXMEMDOM];
3400 vm_paddr_t pa;
3401 vm_offset_t va, vm_page_base;
3402 vm_size_t needed, size;
3403 int domain;
3404 int i;
3405
3406 vm_page_base = 0xd000000000000000ULL;
3407
3408 /* Short-circuit single-domain systems. */
3409 if (vm_ndomains == 1) {
3410 size = round_page(pages * sizeof(struct vm_page));
3411 pa = vm_phys_early_alloc(0, size);
3412 vm_page_array = moea64_map(&vm_page_base,
3413 pa, pa + size, VM_PROT_READ | VM_PROT_WRITE);
3414 vm_page_array_size = pages;
3415 return;
3416 }
3417
3418 for (i = 0; i < MAXMEMDOM; i++)
3419 dom_pages[i] = 0;
3420
3421 /* Now get the number of pages required per domain. */
3422 for (i = 0; i < vm_phys_nsegs; i++) {
3423 domain = vm_phys_segs[i].domain;
3424 KASSERT(domain < MAXMEMDOM,
3425 ("Invalid vm_phys_segs NUMA domain %d!\n", domain));
3426 /* Get size of vm_page_array needed for this segment. */
3427 size = btoc(vm_phys_segs[i].end - vm_phys_segs[i].start);
3428 dom_pages[domain] += size;
3429 }
3430
3431 for (i = 0; phys_avail[i + 1] != 0; i+= 2) {
3432 domain = vm_phys_domain(phys_avail[i]);
3433 KASSERT(domain < MAXMEMDOM,
3434 ("Invalid phys_avail NUMA domain %d!\n", domain));
3435 size = btoc(phys_avail[i + 1] - phys_avail[i]);
3436 dom_pages[domain] += size;
3437 }
3438
3439 /*
3440 * Map in chunks that can get us all 16MB pages. There will be some
3441 * overlap between domains, but that's acceptable for now.
3442 */
3443 vm_page_array_size = 0;
3444 va = vm_page_base;
3445 for (i = 0; i < MAXMEMDOM && vm_page_array_size < pages; i++) {
3446 if (dom_pages[i] == 0)
3447 continue;
3448 size = ulmin(pages - vm_page_array_size, dom_pages[i]);
3449 size = round_page(size * sizeof(struct vm_page));
3450 needed = size;
3451 size = roundup2(size, moea64_large_page_size);
3452 pa = vm_phys_early_alloc(i, size);
3453 vm_page_array_size += size / sizeof(struct vm_page);
3454 moea64_map_range(va, pa, size >> PAGE_SHIFT);
3455 /* Scoot up domain 0, to reduce the domain page overlap. */
3456 if (i == 0)
3457 vm_page_base += size - needed;
3458 va += size;
3459 }
3460 vm_page_array = (vm_page_t)vm_page_base;
3461 vm_page_array_size = pages;
3462 }
3463 #endif
3464
3465 static int64_t
moea64_null_method(void)3466 moea64_null_method(void)
3467 {
3468 return (0);
3469 }
3470
moea64_pte_replace_default(struct pvo_entry * pvo,int flags)3471 static int64_t moea64_pte_replace_default(struct pvo_entry *pvo, int flags)
3472 {
3473 int64_t refchg;
3474
3475 refchg = moea64_pte_unset(pvo);
3476 moea64_pte_insert(pvo);
3477
3478 return (refchg);
3479 }
3480
3481 struct moea64_funcs *moea64_ops;
3482
3483 #define DEFINE_OEA64_IFUNC(ret, func, args, def) \
3484 DEFINE_IFUNC(, ret, moea64_##func, args) { \
3485 moea64_##func##_t f; \
3486 if (moea64_ops == NULL) \
3487 return ((moea64_##func##_t)def); \
3488 f = moea64_ops->func; \
3489 return (f != NULL ? f : (moea64_##func##_t)def);\
3490 }
3491
3492 void
moea64_install(void)3493 moea64_install(void)
3494 {
3495 #ifdef __powerpc64__
3496 if (hw_direct_map == -1) {
3497 moea64_probe_large_page();
3498
3499 /* Use a direct map if we have large page support */
3500 if (moea64_large_page_size > 0)
3501 hw_direct_map = 1;
3502 else
3503 hw_direct_map = 0;
3504 }
3505 #endif
3506
3507 /*
3508 * Default to non-DMAP, and switch over to DMAP functions once we know
3509 * we have DMAP.
3510 */
3511 if (hw_direct_map) {
3512 moea64_methods.quick_enter_page = moea64_quick_enter_page_dmap;
3513 moea64_methods.quick_remove_page = NULL;
3514 moea64_methods.copy_page = moea64_copy_page_dmap;
3515 moea64_methods.zero_page = moea64_zero_page_dmap;
3516 moea64_methods.copy_pages = moea64_copy_pages_dmap;
3517 }
3518 }
3519
3520 DEFINE_OEA64_IFUNC(int64_t, pte_replace, (struct pvo_entry *, int),
3521 moea64_pte_replace_default)
3522 DEFINE_OEA64_IFUNC(int64_t, pte_insert, (struct pvo_entry *), moea64_null_method)
3523 DEFINE_OEA64_IFUNC(int64_t, pte_unset, (struct pvo_entry *), moea64_null_method)
3524 DEFINE_OEA64_IFUNC(int64_t, pte_clear, (struct pvo_entry *, uint64_t),
3525 moea64_null_method)
3526 DEFINE_OEA64_IFUNC(int64_t, pte_synch, (struct pvo_entry *), moea64_null_method)
3527 DEFINE_OEA64_IFUNC(int64_t, pte_insert_sp, (struct pvo_entry *), moea64_null_method)
3528 DEFINE_OEA64_IFUNC(int64_t, pte_unset_sp, (struct pvo_entry *), moea64_null_method)
3529 DEFINE_OEA64_IFUNC(int64_t, pte_replace_sp, (struct pvo_entry *), moea64_null_method)
3530
3531 /* Superpage functions */
3532
3533 /* MMU interface */
3534
3535 static bool
moea64_ps_enabled(pmap_t pmap)3536 moea64_ps_enabled(pmap_t pmap)
3537 {
3538 return (superpages_enabled);
3539 }
3540
3541 static void
moea64_align_superpage(vm_object_t object,vm_ooffset_t offset,vm_offset_t * addr,vm_size_t size)3542 moea64_align_superpage(vm_object_t object, vm_ooffset_t offset,
3543 vm_offset_t *addr, vm_size_t size)
3544 {
3545 vm_offset_t sp_offset;
3546
3547 if (size < HPT_SP_SIZE)
3548 return;
3549
3550 CTR4(KTR_PMAP, "%s: offs=%#jx, addr=%p, size=%#jx",
3551 __func__, (uintmax_t)offset, addr, (uintmax_t)size);
3552
3553 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3554 offset += ptoa(object->pg_color);
3555 sp_offset = offset & HPT_SP_MASK;
3556 if (size - ((HPT_SP_SIZE - sp_offset) & HPT_SP_MASK) < HPT_SP_SIZE ||
3557 (*addr & HPT_SP_MASK) == sp_offset)
3558 return;
3559 if ((*addr & HPT_SP_MASK) < sp_offset)
3560 *addr = (*addr & ~HPT_SP_MASK) + sp_offset;
3561 else
3562 *addr = ((*addr + HPT_SP_MASK) & ~HPT_SP_MASK) + sp_offset;
3563 }
3564
3565 /* Helpers */
3566
3567 static __inline void
moea64_pvo_cleanup(struct pvo_dlist * tofree)3568 moea64_pvo_cleanup(struct pvo_dlist *tofree)
3569 {
3570 struct pvo_entry *pvo;
3571
3572 /* clean up */
3573 while (!SLIST_EMPTY(tofree)) {
3574 pvo = SLIST_FIRST(tofree);
3575 SLIST_REMOVE_HEAD(tofree, pvo_dlink);
3576 if (pvo->pvo_vaddr & PVO_DEAD)
3577 moea64_pvo_remove_from_page(pvo);
3578 free_pvo_entry(pvo);
3579 }
3580 }
3581
3582 static __inline uint16_t
pvo_to_vmpage_flags(struct pvo_entry * pvo)3583 pvo_to_vmpage_flags(struct pvo_entry *pvo)
3584 {
3585 uint16_t flags;
3586
3587 flags = 0;
3588 if ((pvo->pvo_pte.prot & VM_PROT_WRITE) != 0)
3589 flags |= PGA_WRITEABLE;
3590 if ((pvo->pvo_pte.prot & VM_PROT_EXECUTE) != 0)
3591 flags |= PGA_EXECUTABLE;
3592
3593 return (flags);
3594 }
3595
3596 /*
3597 * Check if the given pvo and its superpage are in sva-eva range.
3598 */
3599 static __inline bool
moea64_sp_pvo_in_range(struct pvo_entry * pvo,vm_offset_t sva,vm_offset_t eva)3600 moea64_sp_pvo_in_range(struct pvo_entry *pvo, vm_offset_t sva, vm_offset_t eva)
3601 {
3602 vm_offset_t spva;
3603
3604 spva = PVO_VADDR(pvo) & ~HPT_SP_MASK;
3605 if (spva >= sva && spva + HPT_SP_SIZE <= eva) {
3606 /*
3607 * Because this function is intended to be called from loops
3608 * that iterate over ordered pvo entries, if the condition
3609 * above is true then the pvo must be the first of its
3610 * superpage.
3611 */
3612 KASSERT(PVO_VADDR(pvo) == spva,
3613 ("%s: unexpected unaligned superpage pvo", __func__));
3614 return (true);
3615 }
3616 return (false);
3617 }
3618
3619 /*
3620 * Update vm about the REF/CHG bits if the superpage is managed and
3621 * has (or had) write access.
3622 */
3623 static void
moea64_sp_refchg_process(struct pvo_entry * sp,vm_page_t m,int64_t sp_refchg,vm_prot_t prot)3624 moea64_sp_refchg_process(struct pvo_entry *sp, vm_page_t m,
3625 int64_t sp_refchg, vm_prot_t prot)
3626 {
3627 vm_page_t m_end;
3628 int64_t refchg;
3629
3630 if ((sp->pvo_vaddr & PVO_MANAGED) != 0 && (prot & VM_PROT_WRITE) != 0) {
3631 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++) {
3632 refchg = sp_refchg |
3633 atomic_readandclear_32(&m->md.mdpg_attrs);
3634 if (refchg & LPTE_CHG)
3635 vm_page_dirty(m);
3636 if (refchg & LPTE_REF)
3637 vm_page_aflag_set(m, PGA_REFERENCED);
3638 }
3639 }
3640 }
3641
3642 /* Superpage ops */
3643
3644 static int
moea64_sp_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)3645 moea64_sp_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
3646 vm_prot_t prot, u_int flags, int8_t psind)
3647 {
3648 struct pvo_entry *pvo, **pvos;
3649 struct pvo_head *pvo_head;
3650 vm_offset_t sva;
3651 vm_page_t sm;
3652 vm_paddr_t pa, spa;
3653 bool sync;
3654 struct pvo_dlist tofree;
3655 int error __diagused, i;
3656 uint16_t aflags;
3657
3658 KASSERT((va & HPT_SP_MASK) == 0, ("%s: va %#jx unaligned",
3659 __func__, (uintmax_t)va));
3660 KASSERT(psind == 1, ("%s: invalid psind: %d", __func__, psind));
3661 KASSERT(m->psind == 1, ("%s: invalid m->psind: %d",
3662 __func__, m->psind));
3663 KASSERT(pmap != kernel_pmap,
3664 ("%s: function called with kernel pmap", __func__));
3665
3666 CTR5(KTR_PMAP, "%s: va=%#jx, pa=%#jx, prot=%#x, flags=%#x, psind=1",
3667 __func__, (uintmax_t)va, (uintmax_t)VM_PAGE_TO_PHYS(m),
3668 prot, flags);
3669
3670 SLIST_INIT(&tofree);
3671
3672 sva = va;
3673 sm = m;
3674 spa = pa = VM_PAGE_TO_PHYS(sm);
3675
3676 /* Try to allocate all PVOs first, to make failure handling easier. */
3677 pvos = malloc(HPT_SP_PAGES * sizeof(struct pvo_entry *), M_TEMP,
3678 M_NOWAIT);
3679 if (pvos == NULL) {
3680 CTR1(KTR_PMAP, "%s: failed to alloc pvo array", __func__);
3681 return (KERN_RESOURCE_SHORTAGE);
3682 }
3683
3684 for (i = 0; i < HPT_SP_PAGES; i++) {
3685 pvos[i] = alloc_pvo_entry(0);
3686 if (pvos[i] == NULL) {
3687 CTR1(KTR_PMAP, "%s: failed to alloc pvo", __func__);
3688 for (i = i - 1; i >= 0; i--)
3689 free_pvo_entry(pvos[i]);
3690 free(pvos, M_TEMP);
3691 return (KERN_RESOURCE_SHORTAGE);
3692 }
3693 }
3694
3695 PV_WR_LOCK(spa);
3696 PMAP_LOCK(pmap);
3697
3698 /* Note: moea64_remove_locked() also clears cached REF/CHG bits. */
3699 moea64_remove_locked(pmap, va, va + HPT_SP_SIZE, &tofree);
3700
3701 /* Enter pages */
3702 for (i = 0; i < HPT_SP_PAGES;
3703 i++, va += PAGE_SIZE, pa += PAGE_SIZE, m++) {
3704 pvo = pvos[i];
3705
3706 pvo->pvo_pte.prot = prot;
3707 pvo->pvo_pte.pa = (pa & ~HPT_SP_MASK) | LPTE_LP_4K_16M |
3708 moea64_calc_wimg(pa, pmap_page_get_memattr(m));
3709
3710 if ((flags & PMAP_ENTER_WIRED) != 0)
3711 pvo->pvo_vaddr |= PVO_WIRED;
3712 pvo->pvo_vaddr |= PVO_LARGE;
3713
3714 if ((m->oflags & VPO_UNMANAGED) != 0)
3715 pvo_head = NULL;
3716 else {
3717 pvo_head = &m->md.mdpg_pvoh;
3718 pvo->pvo_vaddr |= PVO_MANAGED;
3719 }
3720
3721 init_pvo_entry(pvo, pmap, va);
3722
3723 error = moea64_pvo_enter(pvo, pvo_head, NULL);
3724 /*
3725 * All superpage PVOs were previously removed, so no errors
3726 * should occur while inserting the new ones.
3727 */
3728 KASSERT(error == 0, ("%s: unexpected error "
3729 "when inserting superpage PVO: %d",
3730 __func__, error));
3731 }
3732
3733 PMAP_UNLOCK(pmap);
3734 PV_UNLOCK(spa);
3735
3736 sync = (sm->a.flags & PGA_EXECUTABLE) == 0;
3737 /* Note: moea64_pvo_cleanup() also clears page prot. flags. */
3738 moea64_pvo_cleanup(&tofree);
3739 pvo = pvos[0];
3740
3741 /* Set vm page flags */
3742 aflags = pvo_to_vmpage_flags(pvo);
3743 if (aflags != 0)
3744 for (m = sm; m < &sm[HPT_SP_PAGES]; m++)
3745 vm_page_aflag_set(m, aflags);
3746
3747 /*
3748 * Flush the page from the instruction cache if this page is
3749 * mapped executable and cacheable.
3750 */
3751 if (sync && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0)
3752 moea64_syncicache(pmap, sva, spa, HPT_SP_SIZE);
3753
3754 atomic_add_long(&sp_mappings, 1);
3755 CTR3(KTR_PMAP, "%s: SP success for va %#jx in pmap %p",
3756 __func__, (uintmax_t)sva, pmap);
3757
3758 free(pvos, M_TEMP);
3759 return (KERN_SUCCESS);
3760 }
3761
3762 #if VM_NRESERVLEVEL > 0
3763 static void
moea64_sp_promote(pmap_t pmap,vm_offset_t va,vm_page_t m)3764 moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m)
3765 {
3766 struct pvo_entry *first, *pvo;
3767 vm_paddr_t pa, pa_end;
3768 vm_offset_t sva, va_end;
3769 int64_t sp_refchg;
3770
3771 /* This CTR may generate a lot of output. */
3772 /* CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)va); */
3773
3774 va &= ~HPT_SP_MASK;
3775 sva = va;
3776 /* Get superpage */
3777 pa = VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK;
3778 m = PHYS_TO_VM_PAGE(pa);
3779
3780 PMAP_LOCK(pmap);
3781
3782 /*
3783 * Check if all pages meet promotion criteria.
3784 *
3785 * XXX In some cases the loop below may be executed for each or most
3786 * of the entered pages of a superpage, which can be expensive
3787 * (although it was not profiled) and need some optimization.
3788 *
3789 * Some cases where this seems to happen are:
3790 * - When a superpage is first entered read-only and later becomes
3791 * read-write.
3792 * - When some of the superpage's virtual addresses map to previously
3793 * wired/cached pages while others map to pages allocated from a
3794 * different physical address range. A common scenario where this
3795 * happens is when mmap'ing a file that is already present in FS
3796 * block cache and doesn't fill a superpage.
3797 */
3798 first = pvo = moea64_pvo_find_va(pmap, sva);
3799 for (pa_end = pa + HPT_SP_SIZE;
3800 pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
3801 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
3802 CTR3(KTR_PMAP,
3803 "%s: NULL or dead PVO: pmap=%p, va=%#jx",
3804 __func__, pmap, (uintmax_t)va);
3805 goto error;
3806 }
3807 if (PVO_PADDR(pvo) != pa) {
3808 CTR5(KTR_PMAP, "%s: PAs don't match: "
3809 "pmap=%p, va=%#jx, pvo_pa=%#jx, exp_pa=%#jx",
3810 __func__, pmap, (uintmax_t)va,
3811 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)pa);
3812 atomic_add_long(&sp_p_fail_pa, 1);
3813 goto error;
3814 }
3815 if ((first->pvo_vaddr & PVO_FLAGS_PROMOTE) !=
3816 (pvo->pvo_vaddr & PVO_FLAGS_PROMOTE)) {
3817 CTR5(KTR_PMAP, "%s: PVO flags don't match: "
3818 "pmap=%p, va=%#jx, pvo_flags=%#jx, exp_flags=%#jx",
3819 __func__, pmap, (uintmax_t)va,
3820 (uintmax_t)(pvo->pvo_vaddr & PVO_FLAGS_PROMOTE),
3821 (uintmax_t)(first->pvo_vaddr & PVO_FLAGS_PROMOTE));
3822 atomic_add_long(&sp_p_fail_flags, 1);
3823 goto error;
3824 }
3825 if (first->pvo_pte.prot != pvo->pvo_pte.prot) {
3826 CTR5(KTR_PMAP, "%s: PVO protections don't match: "
3827 "pmap=%p, va=%#jx, pvo_prot=%#x, exp_prot=%#x",
3828 __func__, pmap, (uintmax_t)va,
3829 pvo->pvo_pte.prot, first->pvo_pte.prot);
3830 atomic_add_long(&sp_p_fail_prot, 1);
3831 goto error;
3832 }
3833 if ((first->pvo_pte.pa & LPTE_WIMG) !=
3834 (pvo->pvo_pte.pa & LPTE_WIMG)) {
3835 CTR5(KTR_PMAP, "%s: WIMG bits don't match: "
3836 "pmap=%p, va=%#jx, pvo_wimg=%#jx, exp_wimg=%#jx",
3837 __func__, pmap, (uintmax_t)va,
3838 (uintmax_t)(pvo->pvo_pte.pa & LPTE_WIMG),
3839 (uintmax_t)(first->pvo_pte.pa & LPTE_WIMG));
3840 atomic_add_long(&sp_p_fail_wimg, 1);
3841 goto error;
3842 }
3843
3844 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo);
3845 }
3846
3847 /* All OK, promote. */
3848
3849 /*
3850 * Handle superpage REF/CHG bits. If REF or CHG is set in
3851 * any page, then it must be set in the superpage.
3852 *
3853 * Instead of querying each page, we take advantage of two facts:
3854 * 1- If a page is being promoted, it was referenced.
3855 * 2- If promoted pages are writable, they were modified.
3856 */
3857 sp_refchg = LPTE_REF |
3858 ((first->pvo_pte.prot & VM_PROT_WRITE) != 0 ? LPTE_CHG : 0);
3859
3860 /* Promote pages */
3861
3862 for (pvo = first, va_end = PVO_VADDR(pvo) + HPT_SP_SIZE;
3863 pvo != NULL && PVO_VADDR(pvo) < va_end;
3864 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
3865 pvo->pvo_pte.pa &= ADDR_POFF | ~HPT_SP_MASK;
3866 pvo->pvo_pte.pa |= LPTE_LP_4K_16M;
3867 pvo->pvo_vaddr |= PVO_LARGE;
3868 }
3869 moea64_pte_replace_sp(first);
3870
3871 /* Send REF/CHG bits to VM */
3872 moea64_sp_refchg_process(first, m, sp_refchg, first->pvo_pte.prot);
3873
3874 /* Use first page to cache REF/CHG bits */
3875 atomic_set_32(&m->md.mdpg_attrs, sp_refchg | MDPG_ATTR_SP);
3876
3877 PMAP_UNLOCK(pmap);
3878
3879 atomic_add_long(&sp_mappings, 1);
3880 atomic_add_long(&sp_promotions, 1);
3881 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3882 __func__, (uintmax_t)sva, pmap);
3883 return;
3884
3885 error:
3886 atomic_add_long(&sp_p_failures, 1);
3887 PMAP_UNLOCK(pmap);
3888 }
3889 #endif
3890
3891 static void
moea64_sp_demote_aligned(struct pvo_entry * sp)3892 moea64_sp_demote_aligned(struct pvo_entry *sp)
3893 {
3894 struct pvo_entry *pvo;
3895 vm_offset_t va, va_end;
3896 vm_paddr_t pa;
3897 vm_page_t m;
3898 pmap_t pmap __diagused;
3899 int64_t refchg;
3900
3901 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
3902
3903 pmap = sp->pvo_pmap;
3904 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3905
3906 pvo = sp;
3907
3908 /* Demote pages */
3909
3910 va = PVO_VADDR(pvo);
3911 pa = PVO_PADDR(pvo);
3912 m = PHYS_TO_VM_PAGE(pa);
3913
3914 for (pvo = sp, va_end = va + HPT_SP_SIZE;
3915 pvo != NULL && PVO_VADDR(pvo) < va_end;
3916 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo),
3917 va += PAGE_SIZE, pa += PAGE_SIZE) {
3918 KASSERT(pvo && PVO_VADDR(pvo) == va,
3919 ("%s: missing PVO for va %#jx", __func__, (uintmax_t)va));
3920
3921 pvo->pvo_vaddr &= ~PVO_LARGE;
3922 pvo->pvo_pte.pa &= ~LPTE_RPGN;
3923 pvo->pvo_pte.pa |= pa;
3924
3925 }
3926 refchg = moea64_pte_replace_sp(sp);
3927
3928 /*
3929 * Clear SP flag
3930 *
3931 * XXX It is possible that another pmap has this page mapped as
3932 * part of a superpage, but as the SP flag is used only for
3933 * caching SP REF/CHG bits, that will be queried if not set
3934 * in cache, it should be ok to clear it here.
3935 */
3936 atomic_clear_32(&m->md.mdpg_attrs, MDPG_ATTR_SP);
3937
3938 /*
3939 * Handle superpage REF/CHG bits. A bit set in the superpage
3940 * means all pages should consider it set.
3941 */
3942 moea64_sp_refchg_process(sp, m, refchg, sp->pvo_pte.prot);
3943
3944 atomic_add_long(&sp_demotions, 1);
3945 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3946 __func__, (uintmax_t)PVO_VADDR(sp), pmap);
3947 }
3948
3949 static void
moea64_sp_demote(struct pvo_entry * pvo)3950 moea64_sp_demote(struct pvo_entry *pvo)
3951 {
3952 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
3953
3954 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
3955 pvo = moea64_pvo_find_va(pvo->pvo_pmap,
3956 PVO_VADDR(pvo) & ~HPT_SP_MASK);
3957 KASSERT(pvo != NULL, ("%s: missing PVO for va %#jx",
3958 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
3959 }
3960 moea64_sp_demote_aligned(pvo);
3961 }
3962
3963 static struct pvo_entry *
moea64_sp_unwire(struct pvo_entry * sp)3964 moea64_sp_unwire(struct pvo_entry *sp)
3965 {
3966 struct pvo_entry *pvo, *prev;
3967 vm_offset_t eva;
3968 pmap_t pm;
3969 int64_t ret, refchg;
3970
3971 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
3972
3973 pm = sp->pvo_pmap;
3974 PMAP_LOCK_ASSERT(pm, MA_OWNED);
3975
3976 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
3977 refchg = 0;
3978 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
3979 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
3980 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
3981 panic("%s: pvo %p is missing PVO_WIRED",
3982 __func__, pvo);
3983 pvo->pvo_vaddr &= ~PVO_WIRED;
3984
3985 ret = moea64_pte_replace(pvo, 0 /* No invalidation */);
3986 if (ret < 0)
3987 refchg |= LPTE_CHG;
3988 else
3989 refchg |= ret;
3990
3991 pm->pm_stats.wired_count--;
3992 }
3993
3994 /* Send REF/CHG bits to VM */
3995 moea64_sp_refchg_process(sp, PHYS_TO_VM_PAGE(PVO_PADDR(sp)),
3996 refchg, sp->pvo_pte.prot);
3997
3998 return (prev);
3999 }
4000
4001 static struct pvo_entry *
moea64_sp_protect(struct pvo_entry * sp,vm_prot_t prot)4002 moea64_sp_protect(struct pvo_entry *sp, vm_prot_t prot)
4003 {
4004 struct pvo_entry *pvo, *prev;
4005 vm_offset_t eva;
4006 pmap_t pm;
4007 vm_page_t m, m_end;
4008 int64_t ret, refchg;
4009 vm_prot_t oldprot;
4010
4011 CTR3(KTR_PMAP, "%s: va=%#jx, prot=%x",
4012 __func__, (uintmax_t)PVO_VADDR(sp), prot);
4013
4014 pm = sp->pvo_pmap;
4015 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4016
4017 oldprot = sp->pvo_pte.prot;
4018 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4019 KASSERT(m != NULL, ("%s: missing vm page for pa %#jx",
4020 __func__, (uintmax_t)PVO_PADDR(sp)));
4021 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4022 refchg = 0;
4023
4024 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4025 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
4026 pvo->pvo_pte.prot = prot;
4027 /*
4028 * If the PVO is in the page table, update mapping
4029 */
4030 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
4031 if (ret < 0)
4032 refchg |= LPTE_CHG;
4033 else
4034 refchg |= ret;
4035 }
4036
4037 /* Send REF/CHG bits to VM */
4038 moea64_sp_refchg_process(sp, m, refchg, oldprot);
4039
4040 /* Handle pages that became executable */
4041 if ((m->a.flags & PGA_EXECUTABLE) == 0 &&
4042 (sp->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
4043 if ((m->oflags & VPO_UNMANAGED) == 0)
4044 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++)
4045 vm_page_aflag_set(m, PGA_EXECUTABLE);
4046 moea64_syncicache(pm, PVO_VADDR(sp), PVO_PADDR(sp),
4047 HPT_SP_SIZE);
4048 }
4049
4050 return (prev);
4051 }
4052
4053 static struct pvo_entry *
moea64_sp_remove(struct pvo_entry * sp,struct pvo_dlist * tofree)4054 moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree)
4055 {
4056 struct pvo_entry *pvo, *tpvo;
4057 vm_offset_t eva;
4058 pmap_t pm __diagused;
4059
4060 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
4061
4062 pm = sp->pvo_pmap;
4063 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4064
4065 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4066 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
4067 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
4068
4069 /*
4070 * For locking reasons, remove this from the page table and
4071 * pmap, but save delinking from the vm_page for a second
4072 * pass
4073 */
4074 moea64_pvo_remove_from_pmap(pvo);
4075 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
4076 }
4077
4078 /*
4079 * Clear SP bit
4080 *
4081 * XXX See comment in moea64_sp_demote_aligned() for why it's
4082 * ok to always clear the SP bit on remove/demote.
4083 */
4084 atomic_clear_32(&PHYS_TO_VM_PAGE(PVO_PADDR(sp))->md.mdpg_attrs,
4085 MDPG_ATTR_SP);
4086
4087 return (tpvo);
4088 }
4089
4090 static int64_t
moea64_sp_query_locked(struct pvo_entry * pvo,uint64_t ptebit)4091 moea64_sp_query_locked(struct pvo_entry *pvo, uint64_t ptebit)
4092 {
4093 int64_t refchg, ret;
4094 vm_offset_t eva;
4095 vm_page_t m;
4096 pmap_t pmap;
4097 struct pvo_entry *sp;
4098
4099 PV_LOCKASSERT(PVO_PADDR(pvo));
4100
4101 pmap = pvo->pvo_pmap;
4102 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
4103
4104 /* Get first SP PVO */
4105 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4106 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4107 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4108 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4109 } else
4110 sp = pvo;
4111 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4112
4113 refchg = 0;
4114 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4115 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4116 ret = moea64_pte_synch(pvo);
4117 if (ret > 0) {
4118 refchg |= ret & (LPTE_CHG | LPTE_REF);
4119 if ((refchg & ptebit) != 0)
4120 break;
4121 }
4122 }
4123
4124 /* Save results */
4125 if (refchg != 0) {
4126 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4127 atomic_set_32(&m->md.mdpg_attrs, refchg | MDPG_ATTR_SP);
4128 }
4129
4130 return (refchg);
4131 }
4132
4133 /*
4134 * Note: this assumes the vm_page represented by the given pvo
4135 * is at least read locked.
4136 */
4137 static int64_t
moea64_sp_query(struct pvo_entry * pvo,uint64_t ptebit)4138 moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit)
4139 {
4140 int64_t refchg;
4141 pmap_t pmap;
4142
4143 PV_LOCKASSERT(PVO_PADDR(pvo));
4144
4145 pmap = pvo->pvo_pmap;
4146 PMAP_LOCK(pmap);
4147
4148 /*
4149 * Check if SP was demoted/removed before pmap lock was acquired.
4150 */
4151 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4152 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4153 __func__, (uintmax_t)PVO_PADDR(pvo));
4154 PMAP_UNLOCK(pmap);
4155 return (-1);
4156 }
4157
4158 refchg = moea64_sp_query_locked(pvo, ptebit);
4159 PMAP_UNLOCK(pmap);
4160
4161 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4162 __func__, (uintmax_t)PVO_VADDR(pvo),
4163 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)refchg);
4164
4165 return (refchg);
4166 }
4167
4168 static int64_t
moea64_sp_pvo_clear(struct pvo_entry * pvo,uint64_t ptebit)4169 moea64_sp_pvo_clear(struct pvo_entry *pvo, uint64_t ptebit)
4170 {
4171 int64_t refchg, ret;
4172 pmap_t pmap;
4173 struct pvo_entry *sp;
4174 vm_offset_t eva;
4175 vm_page_t m;
4176
4177 pmap = pvo->pvo_pmap;
4178 PMAP_LOCK(pmap);
4179
4180 /*
4181 * Check if SP was demoted/removed before pmap lock was acquired.
4182 */
4183 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4184 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4185 __func__, (uintmax_t)PVO_PADDR(pvo));
4186 PMAP_UNLOCK(pmap);
4187 return (-1);
4188 }
4189
4190 /* Get first SP PVO */
4191 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4192 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4193 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4194 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4195 } else
4196 sp = pvo;
4197 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4198
4199 refchg = 0;
4200 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4201 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4202 ret = moea64_pte_clear(pvo, ptebit);
4203 if (ret > 0)
4204 refchg |= ret & (LPTE_CHG | LPTE_REF);
4205 }
4206
4207 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4208 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
4209 PMAP_UNLOCK(pmap);
4210
4211 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4212 __func__, (uintmax_t)PVO_VADDR(sp),
4213 (uintmax_t)PVO_PADDR(sp), (uintmax_t)refchg);
4214
4215 return (refchg);
4216 }
4217
4218 static int64_t
moea64_sp_clear(struct pvo_entry * pvo,vm_page_t m,uint64_t ptebit)4219 moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m, uint64_t ptebit)
4220 {
4221 int64_t count, ret;
4222 pmap_t pmap;
4223
4224 count = 0;
4225 pmap = pvo->pvo_pmap;
4226
4227 /*
4228 * Since this reference bit is shared by 4096 4KB pages, it
4229 * should not be cleared every time it is tested. Apply a
4230 * simple "hash" function on the physical page number, the
4231 * virtual superpage number, and the pmap address to select
4232 * one 4KB page out of the 4096 on which testing the
4233 * reference bit will result in clearing that reference bit.
4234 * This function is designed to avoid the selection of the
4235 * same 4KB page for every 16MB page mapping.
4236 *
4237 * Always leave the reference bit of a wired mapping set, as
4238 * the current state of its reference bit won't affect page
4239 * replacement.
4240 */
4241 if (ptebit == LPTE_REF && (((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) ^
4242 (PVO_VADDR(pvo) >> HPT_SP_SHIFT) ^ (uintptr_t)pmap) &
4243 (HPT_SP_PAGES - 1)) == 0 && (pvo->pvo_vaddr & PVO_WIRED) == 0) {
4244 if ((ret = moea64_sp_pvo_clear(pvo, ptebit)) == -1)
4245 return (-1);
4246
4247 if ((ret & ptebit) != 0)
4248 count++;
4249
4250 /*
4251 * If this page was not selected by the hash function, then assume
4252 * its REF bit was set.
4253 */
4254 } else if (ptebit == LPTE_REF) {
4255 count++;
4256
4257 /*
4258 * To clear the CHG bit of a single SP page, first it must be demoted.
4259 * But if no CHG bit is set, no bit clear and thus no SP demotion is
4260 * needed.
4261 */
4262 } else {
4263 CTR4(KTR_PMAP, "%s: ptebit=%#jx, va=%#jx, pa=%#jx",
4264 __func__, (uintmax_t)ptebit, (uintmax_t)PVO_VADDR(pvo),
4265 (uintmax_t)PVO_PADDR(pvo));
4266
4267 PMAP_LOCK(pmap);
4268
4269 /*
4270 * Make sure SP wasn't demoted/removed before pmap lock
4271 * was acquired.
4272 */
4273 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4274 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4275 __func__, (uintmax_t)PVO_PADDR(pvo));
4276 PMAP_UNLOCK(pmap);
4277 return (-1);
4278 }
4279
4280 ret = moea64_sp_query_locked(pvo, ptebit);
4281 if ((ret & ptebit) != 0)
4282 count++;
4283 else {
4284 PMAP_UNLOCK(pmap);
4285 return (0);
4286 }
4287
4288 moea64_sp_demote(pvo);
4289 moea64_pte_clear(pvo, ptebit);
4290
4291 /*
4292 * Write protect the mapping to a single page so that a
4293 * subsequent write access may repromote.
4294 */
4295 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
4296 moea64_pvo_protect(pmap, pvo,
4297 pvo->pvo_pte.prot & ~VM_PROT_WRITE);
4298
4299 PMAP_UNLOCK(pmap);
4300 }
4301
4302 return (count);
4303 }
4304