1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008-2015 Nathan Whitehorn
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 /*
31 * Manages physical address maps.
32 *
33 * Since the information managed by this module is also stored by the
34 * logical address mapping module, this module may throw away valid virtual
35 * to physical mappings at almost any time. However, invalidations of
36 * mappings must be done as requested.
37 *
38 * In order to cope with hardware architectures which make virtual to
39 * physical map invalidates expensive, this module may delay invalidate
40 * reduced protection operations until such time as they are actually
41 * necessary. This module is given full information as to which processors
42 * are currently using which maps, and to when physical maps must be made
43 * correct.
44 */
45
46 #include "opt_kstack_pages.h"
47
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/conf.h>
51 #include <sys/queue.h>
52 #include <sys/cpuset.h>
53 #include <sys/kerneldump.h>
54 #include <sys/ktr.h>
55 #include <sys/lock.h>
56 #include <sys/msgbuf.h>
57 #include <sys/malloc.h>
58 #include <sys/mman.h>
59 #include <sys/mutex.h>
60 #include <sys/proc.h>
61 #include <sys/rwlock.h>
62 #include <sys/sched.h>
63 #include <sys/sysctl.h>
64 #include <sys/systm.h>
65 #include <sys/vmmeter.h>
66 #include <sys/smp.h>
67 #include <sys/reboot.h>
68
69 #include <sys/kdb.h>
70
71 #include <dev/ofw/openfirm.h>
72
73 #include <vm/vm.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_param.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_phys.h>
79 #include <vm/vm_map.h>
80 #include <vm/vm_object.h>
81 #include <vm/vm_extern.h>
82 #include <vm/vm_pageout.h>
83 #include <vm/vm_dumpset.h>
84 #include <vm/vm_radix.h>
85 #include <vm/vm_reserv.h>
86 #include <vm/uma.h>
87
88 #include <machine/_inttypes.h>
89 #include <machine/cpu.h>
90 #include <machine/ifunc.h>
91 #include <machine/platform.h>
92 #include <machine/frame.h>
93 #include <machine/md_var.h>
94 #include <machine/psl.h>
95 #include <machine/bat.h>
96 #include <machine/hid.h>
97 #include <machine/pte.h>
98 #include <machine/sr.h>
99 #include <machine/trap.h>
100 #include <machine/mmuvar.h>
101
102 #include "mmu_oea64.h"
103
104 void moea64_release_vsid(uint64_t vsid);
105 uintptr_t moea64_get_unique_vsid(void);
106
107 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR)
108 #define ENABLE_TRANS(msr) mtmsr(msr)
109
110 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4))
111 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff)
112 #define VSID_HASH_MASK 0x0000007fffffffffULL
113
114 /*
115 * Locking semantics:
116 *
117 * There are two locks of interest: the page locks and the pmap locks, which
118 * protect their individual PVO lists and are locked in that order. The contents
119 * of all PVO entries are protected by the locks of their respective pmaps.
120 * The pmap of any PVO is guaranteed not to change so long as the PVO is linked
121 * into any list.
122 *
123 */
124
125 #define PV_LOCK_COUNT PA_LOCK_COUNT
126 static struct mtx_padalign pv_lock[PV_LOCK_COUNT];
127
128 /*
129 * Cheap NUMA-izing of the pv locks, to reduce contention across domains.
130 * NUMA domains on POWER9 appear to be indexed as sparse memory spaces, with the
131 * index at (N << 45).
132 */
133 #ifdef __powerpc64__
134 #define PV_LOCK_IDX(pa) ((pa_index(pa) * (((pa) >> 45) + 1)) % PV_LOCK_COUNT)
135 #else
136 #define PV_LOCK_IDX(pa) (pa_index(pa) % PV_LOCK_COUNT)
137 #endif
138 #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[PV_LOCK_IDX(pa)]))
139 #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa))
140 #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa))
141 #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED)
142 #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m))
143 #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m))
144 #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
145
146 /* Superpage PV lock */
147
148 #define PV_LOCK_SIZE (1<<PDRSHIFT)
149
150 static __always_inline void
moea64_sp_pv_lock(vm_paddr_t pa)151 moea64_sp_pv_lock(vm_paddr_t pa)
152 {
153 vm_paddr_t pa_end;
154
155 /* Note: breaking when pa_end is reached to avoid overflows */
156 pa_end = pa + (HPT_SP_SIZE - PV_LOCK_SIZE);
157 for (;;) {
158 mtx_lock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
159 if (pa == pa_end)
160 break;
161 pa += PV_LOCK_SIZE;
162 }
163 }
164
165 static __always_inline void
moea64_sp_pv_unlock(vm_paddr_t pa)166 moea64_sp_pv_unlock(vm_paddr_t pa)
167 {
168 vm_paddr_t pa_end;
169
170 /* Note: breaking when pa_end is reached to avoid overflows */
171 pa_end = pa;
172 pa += HPT_SP_SIZE - PV_LOCK_SIZE;
173 for (;;) {
174 mtx_unlock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
175 if (pa == pa_end)
176 break;
177 pa -= PV_LOCK_SIZE;
178 }
179 }
180
181 #define SP_PV_LOCK_ALIGNED(pa) moea64_sp_pv_lock(pa)
182 #define SP_PV_UNLOCK_ALIGNED(pa) moea64_sp_pv_unlock(pa)
183 #define SP_PV_LOCK(pa) moea64_sp_pv_lock((pa) & ~HPT_SP_MASK)
184 #define SP_PV_UNLOCK(pa) moea64_sp_pv_unlock((pa) & ~HPT_SP_MASK)
185 #define SP_PV_PAGE_LOCK(m) SP_PV_LOCK(VM_PAGE_TO_PHYS(m))
186 #define SP_PV_PAGE_UNLOCK(m) SP_PV_UNLOCK(VM_PAGE_TO_PHYS(m))
187
188 struct ofw_map {
189 cell_t om_va;
190 cell_t om_len;
191 uint64_t om_pa;
192 cell_t om_mode;
193 };
194
195 extern unsigned char _etext[];
196 extern unsigned char _end[];
197
198 extern void *slbtrap, *slbtrapend;
199
200 /*
201 * Map of physical memory regions.
202 */
203 static struct mem_region *regions;
204 static struct mem_region *pregions;
205 static struct numa_mem_region *numa_pregions;
206 static u_int phys_avail_count;
207 static int regions_sz, pregions_sz, numapregions_sz;
208
209 extern void bs_remap_earlyboot(void);
210
211 /*
212 * Lock for the SLB tables.
213 */
214 struct mtx moea64_slb_mutex;
215
216 /*
217 * PTEG data.
218 */
219 u_long moea64_pteg_count;
220 u_long moea64_pteg_mask;
221
222 /*
223 * PVO data.
224 */
225
226 uma_zone_t moea64_pvo_zone; /* zone for pvo entries */
227
228 static struct pvo_entry *moea64_bpvo_pool;
229 static int moea64_bpvo_pool_index = 0;
230 static int moea64_bpvo_pool_size = 0;
231 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD,
232 &moea64_bpvo_pool_index, 0, "");
233
234 #define BPVO_POOL_SIZE 327680 /* Sensible historical default value */
235 #define BPVO_POOL_EXPANSION_FACTOR 3
236 #define VSID_NBPW (sizeof(u_int32_t) * 8)
237 #ifdef __powerpc64__
238 #define NVSIDS (NPMAPS * 16)
239 #define VSID_HASHMASK 0xffffffffUL
240 #else
241 #define NVSIDS NPMAPS
242 #define VSID_HASHMASK 0xfffffUL
243 #endif
244 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
245
246 static bool moea64_initialized = false;
247
248 #ifdef MOEA64_STATS
249 /*
250 * Statistics.
251 */
252 u_int moea64_pte_valid = 0;
253 u_int moea64_pte_overflow = 0;
254 u_int moea64_pvo_entries = 0;
255 u_int moea64_pvo_enter_calls = 0;
256 u_int moea64_pvo_remove_calls = 0;
257 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD,
258 &moea64_pte_valid, 0, "");
259 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
260 &moea64_pte_overflow, 0, "");
261 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD,
262 &moea64_pvo_entries, 0, "");
263 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
264 &moea64_pvo_enter_calls, 0, "");
265 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
266 &moea64_pvo_remove_calls, 0, "");
267 #endif
268
269 vm_offset_t moea64_scratchpage_va[2];
270 struct pvo_entry *moea64_scratchpage_pvo[2];
271 struct mtx moea64_scratchpage_mtx;
272
273 uint64_t moea64_large_page_mask = 0;
274 uint64_t moea64_large_page_size = 0;
275 int moea64_large_page_shift = 0;
276 bool moea64_has_lp_4k_16m = false;
277
278 /*
279 * PVO calls.
280 */
281 static int moea64_pvo_enter(struct pvo_entry *pvo,
282 struct pvo_head *pvo_head, struct pvo_entry **oldpvo);
283 static void moea64_pvo_remove_from_pmap(struct pvo_entry *pvo);
284 static void moea64_pvo_remove_from_page(struct pvo_entry *pvo);
285 static void moea64_pvo_remove_from_page_locked(
286 struct pvo_entry *pvo, vm_page_t m);
287 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
288
289 /*
290 * Utility routines.
291 */
292 static bool moea64_query_bit(vm_page_t, uint64_t);
293 static u_int moea64_clear_bit(vm_page_t, uint64_t);
294 static void moea64_kremove(vm_offset_t);
295 static void moea64_syncicache(pmap_t pmap, vm_offset_t va,
296 vm_paddr_t pa, vm_size_t sz);
297 static void moea64_pmap_init_qpages(void);
298 static void moea64_remove_locked(pmap_t, vm_offset_t,
299 vm_offset_t, struct pvo_dlist *);
300
301 /*
302 * Superpages data and routines.
303 */
304
305 /*
306 * PVO flags (in vaddr) that must match for promotion to succeed.
307 * Note that protection bits are checked separately, as they reside in
308 * another field.
309 */
310 #define PVO_FLAGS_PROMOTE (PVO_WIRED | PVO_MANAGED | PVO_PTEGIDX_VALID)
311
312 #define PVO_IS_SP(pvo) (((pvo)->pvo_vaddr & PVO_LARGE) && \
313 (pvo)->pvo_pmap != kernel_pmap)
314
315 /* Get physical address from PVO. */
316 #define PVO_PADDR(pvo) moea64_pvo_paddr(pvo)
317
318 /* MD page flag indicating that the page is a superpage. */
319 #define MDPG_ATTR_SP 0x40000000
320
321 SYSCTL_DECL(_vm_pmap);
322
323 static SYSCTL_NODE(_vm_pmap, OID_AUTO, sp, CTLFLAG_RD, 0,
324 "SP page mapping counters");
325
326 static u_long sp_demotions;
327 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, demotions, CTLFLAG_RD,
328 &sp_demotions, 0, "SP page demotions");
329
330 static u_long sp_mappings;
331 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, mappings, CTLFLAG_RD,
332 &sp_mappings, 0, "SP page mappings");
333
334 static u_long sp_p_failures;
335 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_failures, CTLFLAG_RD,
336 &sp_p_failures, 0, "SP page promotion failures");
337
338 static u_long sp_p_fail_pa;
339 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_pa, CTLFLAG_RD,
340 &sp_p_fail_pa, 0, "SP page promotion failure: PAs don't match");
341
342 static u_long sp_p_fail_flags;
343 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_flags, CTLFLAG_RD,
344 &sp_p_fail_flags, 0, "SP page promotion failure: page flags don't match");
345
346 static u_long sp_p_fail_prot;
347 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_prot, CTLFLAG_RD,
348 &sp_p_fail_prot, 0,
349 "SP page promotion failure: page protections don't match");
350
351 static u_long sp_p_fail_wimg;
352 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_wimg, CTLFLAG_RD,
353 &sp_p_fail_wimg, 0, "SP page promotion failure: WIMG bits don't match");
354
355 static u_long sp_promotions;
356 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, promotions, CTLFLAG_RD,
357 &sp_promotions, 0, "SP page promotions");
358
359 static bool moea64_ps_enabled(pmap_t);
360 static void moea64_align_superpage(vm_object_t, vm_ooffset_t,
361 vm_offset_t *, vm_size_t);
362
363 static int moea64_sp_enter(pmap_t pmap, vm_offset_t va,
364 vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind);
365 static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp,
366 struct pvo_dlist *tofree);
367
368 #if VM_NRESERVLEVEL > 0
369 static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m);
370 #endif
371 static void moea64_sp_demote_aligned(struct pvo_entry *sp);
372 static void moea64_sp_demote(struct pvo_entry *pvo);
373
374 static struct pvo_entry *moea64_sp_unwire(struct pvo_entry *sp);
375 static struct pvo_entry *moea64_sp_protect(struct pvo_entry *sp,
376 vm_prot_t prot);
377
378 static int64_t moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit);
379 static int64_t moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m,
380 uint64_t ptebit);
381
382 static __inline bool moea64_sp_pvo_in_range(struct pvo_entry *pvo,
383 vm_offset_t sva, vm_offset_t eva);
384
385 /*
386 * Kernel MMU interface
387 */
388 void moea64_clear_modify(vm_page_t);
389 void moea64_copy_page(vm_page_t, vm_page_t);
390 void moea64_copy_page_dmap(vm_page_t, vm_page_t);
391 void moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
392 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
393 void moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
394 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
395 int moea64_enter(pmap_t, vm_offset_t, vm_page_t, vm_prot_t,
396 u_int flags, int8_t psind);
397 void moea64_enter_object(pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
398 vm_prot_t);
399 void moea64_enter_quick(pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
400 vm_paddr_t moea64_extract(pmap_t, vm_offset_t);
401 vm_page_t moea64_extract_and_hold(pmap_t, vm_offset_t, vm_prot_t);
402 void moea64_init(void);
403 bool moea64_is_modified(vm_page_t);
404 bool moea64_is_prefaultable(pmap_t, vm_offset_t);
405 bool moea64_is_referenced(vm_page_t);
406 int moea64_ts_referenced(vm_page_t);
407 vm_offset_t moea64_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
408 bool moea64_page_exists_quick(pmap_t, vm_page_t);
409 void moea64_page_init(vm_page_t);
410 int moea64_page_wired_mappings(vm_page_t);
411 int moea64_pinit(pmap_t);
412 void moea64_pinit0(pmap_t);
413 void moea64_protect(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
414 void moea64_qenter(vm_offset_t, vm_page_t *, int);
415 void moea64_qremove(vm_offset_t, int);
416 void moea64_release(pmap_t);
417 void moea64_remove(pmap_t, vm_offset_t, vm_offset_t);
418 void moea64_remove_pages(pmap_t);
419 void moea64_remove_all(vm_page_t);
420 void moea64_remove_write(vm_page_t);
421 void moea64_unwire(pmap_t, vm_offset_t, vm_offset_t);
422 void moea64_zero_page(vm_page_t);
423 void moea64_zero_page_dmap(vm_page_t);
424 void moea64_zero_page_area(vm_page_t, int, int);
425 void moea64_activate(struct thread *);
426 void moea64_deactivate(struct thread *);
427 void *moea64_mapdev(vm_paddr_t, vm_size_t);
428 void *moea64_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t);
429 void moea64_unmapdev(void *, vm_size_t);
430 vm_paddr_t moea64_kextract(vm_offset_t);
431 void moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma);
432 void moea64_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t ma);
433 void moea64_kenter(vm_offset_t, vm_paddr_t);
434 int moea64_dev_direct_mapped(vm_paddr_t, vm_size_t);
435 static void moea64_sync_icache(pmap_t, vm_offset_t, vm_size_t);
436 void moea64_dumpsys_map(vm_paddr_t pa, size_t sz,
437 void **va);
438 void moea64_scan_init(void);
439 vm_offset_t moea64_quick_enter_page(vm_page_t m);
440 vm_offset_t moea64_quick_enter_page_dmap(vm_page_t m);
441 void moea64_quick_remove_page(vm_offset_t addr);
442 bool moea64_page_is_mapped(vm_page_t m);
443 static int moea64_map_user_ptr(pmap_t pm,
444 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
445 static int moea64_decode_kernel_ptr(vm_offset_t addr,
446 int *is_user, vm_offset_t *decoded_addr);
447 static size_t moea64_scan_pmap(struct bitset *dump_bitset);
448 static void *moea64_dump_pmap_init(unsigned blkpgs);
449 #ifdef __powerpc64__
450 static void moea64_page_array_startup(long);
451 #endif
452 static int moea64_mincore(pmap_t, vm_offset_t, vm_paddr_t *);
453
454 static struct pmap_funcs moea64_methods = {
455 .clear_modify = moea64_clear_modify,
456 .copy_page = moea64_copy_page,
457 .copy_pages = moea64_copy_pages,
458 .enter = moea64_enter,
459 .enter_object = moea64_enter_object,
460 .enter_quick = moea64_enter_quick,
461 .extract = moea64_extract,
462 .extract_and_hold = moea64_extract_and_hold,
463 .init = moea64_init,
464 .is_modified = moea64_is_modified,
465 .is_prefaultable = moea64_is_prefaultable,
466 .is_referenced = moea64_is_referenced,
467 .ts_referenced = moea64_ts_referenced,
468 .map = moea64_map,
469 .mincore = moea64_mincore,
470 .page_exists_quick = moea64_page_exists_quick,
471 .page_init = moea64_page_init,
472 .page_wired_mappings = moea64_page_wired_mappings,
473 .pinit = moea64_pinit,
474 .pinit0 = moea64_pinit0,
475 .protect = moea64_protect,
476 .qenter = moea64_qenter,
477 .qremove = moea64_qremove,
478 .release = moea64_release,
479 .remove = moea64_remove,
480 .remove_pages = moea64_remove_pages,
481 .remove_all = moea64_remove_all,
482 .remove_write = moea64_remove_write,
483 .sync_icache = moea64_sync_icache,
484 .unwire = moea64_unwire,
485 .zero_page = moea64_zero_page,
486 .zero_page_area = moea64_zero_page_area,
487 .activate = moea64_activate,
488 .deactivate = moea64_deactivate,
489 .page_set_memattr = moea64_page_set_memattr,
490 .quick_enter_page = moea64_quick_enter_page,
491 .quick_remove_page = moea64_quick_remove_page,
492 .page_is_mapped = moea64_page_is_mapped,
493 #ifdef __powerpc64__
494 .page_array_startup = moea64_page_array_startup,
495 #endif
496 .ps_enabled = moea64_ps_enabled,
497 .align_superpage = moea64_align_superpage,
498
499 /* Internal interfaces */
500 .mapdev = moea64_mapdev,
501 .mapdev_attr = moea64_mapdev_attr,
502 .unmapdev = moea64_unmapdev,
503 .kextract = moea64_kextract,
504 .kenter = moea64_kenter,
505 .kenter_attr = moea64_kenter_attr,
506 .dev_direct_mapped = moea64_dev_direct_mapped,
507 .dumpsys_pa_init = moea64_scan_init,
508 .dumpsys_scan_pmap = moea64_scan_pmap,
509 .dumpsys_dump_pmap_init = moea64_dump_pmap_init,
510 .dumpsys_map_chunk = moea64_dumpsys_map,
511 .map_user_ptr = moea64_map_user_ptr,
512 .decode_kernel_ptr = moea64_decode_kernel_ptr,
513 };
514
515 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods);
516
517 /*
518 * Get physical address from PVO.
519 *
520 * For superpages, the lower bits are not stored on pvo_pte.pa and must be
521 * obtained from VA.
522 */
523 static __always_inline vm_paddr_t
moea64_pvo_paddr(struct pvo_entry * pvo)524 moea64_pvo_paddr(struct pvo_entry *pvo)
525 {
526 vm_paddr_t pa;
527
528 pa = (pvo)->pvo_pte.pa & LPTE_RPGN;
529
530 if (PVO_IS_SP(pvo)) {
531 pa &= ~HPT_SP_MASK; /* This is needed to clear LPTE_LP bits. */
532 pa |= PVO_VADDR(pvo) & HPT_SP_MASK;
533 }
534 return (pa);
535 }
536
537 static struct pvo_head *
vm_page_to_pvoh(vm_page_t m)538 vm_page_to_pvoh(vm_page_t m)
539 {
540
541 mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED);
542 return (&m->md.mdpg_pvoh);
543 }
544
545 static struct pvo_entry *
alloc_pvo_entry(int bootstrap)546 alloc_pvo_entry(int bootstrap)
547 {
548 struct pvo_entry *pvo;
549
550 if (!moea64_initialized || bootstrap) {
551 if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) {
552 panic("%s: bpvo pool exhausted, index=%d, size=%d, bytes=%zd."
553 "Try setting machdep.moea64_bpvo_pool_size tunable",
554 __func__, moea64_bpvo_pool_index,
555 moea64_bpvo_pool_size,
556 moea64_bpvo_pool_size * sizeof(struct pvo_entry));
557 }
558 pvo = &moea64_bpvo_pool[
559 atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)];
560 bzero(pvo, sizeof(*pvo));
561 pvo->pvo_vaddr = PVO_BOOTSTRAP;
562 } else
563 pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT | M_ZERO);
564
565 return (pvo);
566 }
567
568 static void
init_pvo_entry(struct pvo_entry * pvo,pmap_t pmap,vm_offset_t va)569 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va)
570 {
571 uint64_t vsid;
572 uint64_t hash;
573 int shift;
574
575 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
576
577 pvo->pvo_pmap = pmap;
578 va &= ~ADDR_POFF;
579 pvo->pvo_vaddr |= va;
580 vsid = va_to_vsid(pmap, va);
581 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
582 | (vsid << 16);
583
584 if (pmap == kernel_pmap && (pvo->pvo_vaddr & PVO_LARGE) != 0)
585 shift = moea64_large_page_shift;
586 else
587 shift = ADDR_PIDX_SHFT;
588 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
589 pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
590 }
591
592 static void
free_pvo_entry(struct pvo_entry * pvo)593 free_pvo_entry(struct pvo_entry *pvo)
594 {
595
596 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
597 uma_zfree(moea64_pvo_zone, pvo);
598 }
599
600 void
moea64_pte_from_pvo(const struct pvo_entry * pvo,struct lpte * lpte)601 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte)
602 {
603
604 lpte->pte_hi = moea64_pte_vpn_from_pvo_vpn(pvo);
605 lpte->pte_hi |= LPTE_VALID;
606
607 if (pvo->pvo_vaddr & PVO_LARGE)
608 lpte->pte_hi |= LPTE_BIG;
609 if (pvo->pvo_vaddr & PVO_WIRED)
610 lpte->pte_hi |= LPTE_WIRED;
611 if (pvo->pvo_vaddr & PVO_HID)
612 lpte->pte_hi |= LPTE_HID;
613
614 lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */
615 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
616 lpte->pte_lo |= LPTE_BW;
617 else
618 lpte->pte_lo |= LPTE_BR;
619
620 if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE))
621 lpte->pte_lo |= LPTE_NOEXEC;
622 }
623
624 static __inline uint64_t
moea64_calc_wimg(vm_paddr_t pa,vm_memattr_t ma)625 moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
626 {
627 uint64_t pte_lo;
628 int i;
629
630 if (ma != VM_MEMATTR_DEFAULT) {
631 switch (ma) {
632 case VM_MEMATTR_UNCACHEABLE:
633 return (LPTE_I | LPTE_G);
634 case VM_MEMATTR_CACHEABLE:
635 return (LPTE_M);
636 case VM_MEMATTR_WRITE_COMBINING:
637 case VM_MEMATTR_WRITE_BACK:
638 case VM_MEMATTR_PREFETCHABLE:
639 return (LPTE_I);
640 case VM_MEMATTR_WRITE_THROUGH:
641 return (LPTE_W | LPTE_M);
642 }
643 }
644
645 /*
646 * Assume the page is cache inhibited and access is guarded unless
647 * it's in our available memory array.
648 */
649 pte_lo = LPTE_I | LPTE_G;
650 for (i = 0; i < pregions_sz; i++) {
651 if ((pa >= pregions[i].mr_start) &&
652 (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
653 pte_lo &= ~(LPTE_I | LPTE_G);
654 pte_lo |= LPTE_M;
655 break;
656 }
657 }
658
659 return pte_lo;
660 }
661
662 /*
663 * Quick sort callout for comparing memory regions.
664 */
665 static int om_cmp(const void *a, const void *b);
666
667 static int
om_cmp(const void * a,const void * b)668 om_cmp(const void *a, const void *b)
669 {
670 const struct ofw_map *mapa;
671 const struct ofw_map *mapb;
672
673 mapa = a;
674 mapb = b;
675 if (mapa->om_pa < mapb->om_pa)
676 return (-1);
677 else if (mapa->om_pa > mapb->om_pa)
678 return (1);
679 else
680 return (0);
681 }
682
683 static void
moea64_add_ofw_mappings(phandle_t mmu,size_t sz)684 moea64_add_ofw_mappings(phandle_t mmu, size_t sz)
685 {
686 struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */
687 pcell_t acells, trans_cells[sz/sizeof(cell_t)];
688 struct pvo_entry *pvo;
689 register_t msr;
690 vm_offset_t off;
691 vm_paddr_t pa_base;
692 int i, j;
693
694 bzero(translations, sz);
695 OF_getencprop(OF_finddevice("/"), "#address-cells", &acells,
696 sizeof(acells));
697 if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1)
698 panic("moea64_bootstrap: can't get ofw translations");
699
700 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
701 sz /= sizeof(cell_t);
702 for (i = 0, j = 0; i < sz; j++) {
703 translations[j].om_va = trans_cells[i++];
704 translations[j].om_len = trans_cells[i++];
705 translations[j].om_pa = trans_cells[i++];
706 if (acells == 2) {
707 translations[j].om_pa <<= 32;
708 translations[j].om_pa |= trans_cells[i++];
709 }
710 translations[j].om_mode = trans_cells[i++];
711 }
712 KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)",
713 i, sz));
714
715 sz = j;
716 qsort(translations, sz, sizeof (*translations), om_cmp);
717
718 for (i = 0; i < sz; i++) {
719 pa_base = translations[i].om_pa;
720 #ifndef __powerpc64__
721 if ((translations[i].om_pa >> 32) != 0)
722 panic("OFW translations above 32-bit boundary!");
723 #endif
724
725 if (pa_base % PAGE_SIZE)
726 panic("OFW translation not page-aligned (phys)!");
727 if (translations[i].om_va % PAGE_SIZE)
728 panic("OFW translation not page-aligned (virt)!");
729
730 CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x",
731 pa_base, translations[i].om_va, translations[i].om_len);
732
733 /* Now enter the pages for this mapping */
734
735 DISABLE_TRANS(msr);
736 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
737 /* If this address is direct-mapped, skip remapping */
738 if (hw_direct_map &&
739 translations[i].om_va == PHYS_TO_DMAP(pa_base) &&
740 moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT)
741 == LPTE_M)
742 continue;
743
744 PMAP_LOCK(kernel_pmap);
745 pvo = moea64_pvo_find_va(kernel_pmap,
746 translations[i].om_va + off);
747 PMAP_UNLOCK(kernel_pmap);
748 if (pvo != NULL)
749 continue;
750
751 moea64_kenter(translations[i].om_va + off,
752 pa_base + off);
753 }
754 ENABLE_TRANS(msr);
755 }
756 }
757
758 #ifdef __powerpc64__
759 static void
moea64_probe_large_page(void)760 moea64_probe_large_page(void)
761 {
762 uint16_t pvr = mfpvr() >> 16;
763
764 switch (pvr) {
765 case IBM970:
766 case IBM970FX:
767 case IBM970MP:
768 powerpc_sync(); isync();
769 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
770 powerpc_sync(); isync();
771
772 /* FALLTHROUGH */
773 default:
774 if (moea64_large_page_size == 0) {
775 moea64_large_page_size = 0x1000000; /* 16 MB */
776 moea64_large_page_shift = 24;
777 }
778 }
779
780 moea64_large_page_mask = moea64_large_page_size - 1;
781 }
782
783 static void
moea64_bootstrap_slb_prefault(vm_offset_t va,int large)784 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
785 {
786 struct slb *cache;
787 struct slb entry;
788 uint64_t esid, slbe;
789 uint64_t i;
790
791 cache = PCPU_GET(aim.slb);
792 esid = va >> ADDR_SR_SHFT;
793 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
794
795 for (i = 0; i < 64; i++) {
796 if (cache[i].slbe == (slbe | i))
797 return;
798 }
799
800 entry.slbe = slbe;
801 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
802 if (large)
803 entry.slbv |= SLBV_L;
804
805 slb_insert_kernel(entry.slbe, entry.slbv);
806 }
807 #endif
808
809 static int
moea64_kenter_large(vm_offset_t va,vm_paddr_t pa,uint64_t attr,int bootstrap)810 moea64_kenter_large(vm_offset_t va, vm_paddr_t pa, uint64_t attr, int bootstrap)
811 {
812 struct pvo_entry *pvo;
813 uint64_t pte_lo;
814 int error;
815
816 pte_lo = LPTE_M;
817 pte_lo |= attr;
818
819 pvo = alloc_pvo_entry(bootstrap);
820 pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE;
821 init_pvo_entry(pvo, kernel_pmap, va);
822
823 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE |
824 VM_PROT_EXECUTE;
825 pvo->pvo_pte.pa = pa | pte_lo;
826 error = moea64_pvo_enter(pvo, NULL, NULL);
827 if (error != 0)
828 panic("Error %d inserting large page\n", error);
829 return (0);
830 }
831
832 static void
moea64_setup_direct_map(vm_offset_t kernelstart,vm_offset_t kernelend)833 moea64_setup_direct_map(vm_offset_t kernelstart,
834 vm_offset_t kernelend)
835 {
836 register_t msr;
837 vm_paddr_t pa, pkernelstart, pkernelend;
838 vm_offset_t size, off;
839 uint64_t pte_lo;
840 int i;
841
842 if (moea64_large_page_size == 0)
843 hw_direct_map = 0;
844
845 DISABLE_TRANS(msr);
846 if (hw_direct_map) {
847 PMAP_LOCK(kernel_pmap);
848 for (i = 0; i < pregions_sz; i++) {
849 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
850 pregions[i].mr_size; pa += moea64_large_page_size) {
851 pte_lo = LPTE_M;
852 if (pa & moea64_large_page_mask) {
853 pa &= moea64_large_page_mask;
854 pte_lo |= LPTE_G;
855 }
856 if (pa + moea64_large_page_size >
857 pregions[i].mr_start + pregions[i].mr_size)
858 pte_lo |= LPTE_G;
859
860 moea64_kenter_large(PHYS_TO_DMAP(pa), pa, pte_lo, 1);
861 }
862 }
863 PMAP_UNLOCK(kernel_pmap);
864 }
865
866 /*
867 * Make sure the kernel and BPVO pool stay mapped on systems either
868 * without a direct map or on which the kernel is not already executing
869 * out of the direct-mapped region.
870 */
871 if (kernelstart < DMAP_BASE_ADDRESS) {
872 /*
873 * For pre-dmap execution, we need to use identity mapping
874 * because we will be operating with the mmu on but in the
875 * wrong address configuration until we __restartkernel().
876 */
877 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
878 pa += PAGE_SIZE)
879 moea64_kenter(pa, pa);
880 } else if (!hw_direct_map) {
881 pkernelstart = kernelstart & ~DMAP_BASE_ADDRESS;
882 pkernelend = kernelend & ~DMAP_BASE_ADDRESS;
883 for (pa = pkernelstart & ~PAGE_MASK; pa < pkernelend;
884 pa += PAGE_SIZE)
885 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
886 }
887
888 if (!hw_direct_map) {
889 size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
890 off = (vm_offset_t)(moea64_bpvo_pool);
891 for (pa = off; pa < off + size; pa += PAGE_SIZE)
892 moea64_kenter(pa, pa);
893
894 /* Map exception vectors */
895 for (pa = EXC_RSVD; pa < EXC_LAST; pa += PAGE_SIZE)
896 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
897 }
898 ENABLE_TRANS(msr);
899
900 /*
901 * Allow user to override unmapped_buf_allowed for testing.
902 * XXXKIB Only direct map implementation was tested.
903 */
904 if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
905 &unmapped_buf_allowed))
906 unmapped_buf_allowed = hw_direct_map;
907 }
908
909 /* Quick sort callout for comparing physical addresses. */
910 static int
pa_cmp(const void * a,const void * b)911 pa_cmp(const void *a, const void *b)
912 {
913 const vm_paddr_t *pa = a, *pb = b;
914
915 if (*pa < *pb)
916 return (-1);
917 else if (*pa > *pb)
918 return (1);
919 else
920 return (0);
921 }
922
923 void
moea64_early_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)924 moea64_early_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
925 {
926 int i, j;
927 vm_size_t physsz, hwphyssz;
928 vm_paddr_t kernelphysstart, kernelphysend;
929 int rm_pavail;
930
931 /* Level 0 reservations consist of 4096 pages (16MB superpage). */
932 vm_level_0_order = 12;
933
934 #ifndef __powerpc64__
935 /* We don't have a direct map since there is no BAT */
936 hw_direct_map = 0;
937
938 /* Make sure battable is zero, since we have no BAT */
939 for (i = 0; i < 16; i++) {
940 battable[i].batu = 0;
941 battable[i].batl = 0;
942 }
943 #else
944 /* Install trap handlers for SLBs */
945 bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap);
946 bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap);
947 __syncicache((void *)EXC_DSE, 0x80);
948 __syncicache((void *)EXC_ISE, 0x80);
949 #endif
950
951 kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
952 kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
953
954 /* Get physical memory regions from firmware */
955 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
956 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
957
958 if (PHYS_AVAIL_ENTRIES < regions_sz)
959 panic("moea64_bootstrap: phys_avail too small");
960
961 phys_avail_count = 0;
962 physsz = 0;
963 hwphyssz = 0;
964 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
965 for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
966 CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)",
967 regions[i].mr_start, regions[i].mr_start +
968 regions[i].mr_size, regions[i].mr_size);
969 if (hwphyssz != 0 &&
970 (physsz + regions[i].mr_size) >= hwphyssz) {
971 if (physsz < hwphyssz) {
972 phys_avail[j] = regions[i].mr_start;
973 phys_avail[j + 1] = regions[i].mr_start +
974 hwphyssz - physsz;
975 physsz = hwphyssz;
976 phys_avail_count++;
977 dump_avail[j] = phys_avail[j];
978 dump_avail[j + 1] = phys_avail[j + 1];
979 }
980 break;
981 }
982 phys_avail[j] = regions[i].mr_start;
983 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
984 phys_avail_count++;
985 physsz += regions[i].mr_size;
986 dump_avail[j] = phys_avail[j];
987 dump_avail[j + 1] = phys_avail[j + 1];
988 }
989
990 /* Check for overlap with the kernel and exception vectors */
991 rm_pavail = 0;
992 for (j = 0; j < 2*phys_avail_count; j+=2) {
993 if (phys_avail[j] < EXC_LAST)
994 phys_avail[j] += EXC_LAST;
995
996 if (phys_avail[j] >= kernelphysstart &&
997 phys_avail[j+1] <= kernelphysend) {
998 phys_avail[j] = phys_avail[j+1] = ~0;
999 rm_pavail++;
1000 continue;
1001 }
1002
1003 if (kernelphysstart >= phys_avail[j] &&
1004 kernelphysstart < phys_avail[j+1]) {
1005 if (kernelphysend < phys_avail[j+1]) {
1006 phys_avail[2*phys_avail_count] =
1007 (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
1008 phys_avail[2*phys_avail_count + 1] =
1009 phys_avail[j+1];
1010 phys_avail_count++;
1011 }
1012
1013 phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
1014 }
1015
1016 if (kernelphysend >= phys_avail[j] &&
1017 kernelphysend < phys_avail[j+1]) {
1018 if (kernelphysstart > phys_avail[j]) {
1019 phys_avail[2*phys_avail_count] = phys_avail[j];
1020 phys_avail[2*phys_avail_count + 1] =
1021 kernelphysstart & ~PAGE_MASK;
1022 phys_avail_count++;
1023 }
1024
1025 phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
1026 PAGE_SIZE;
1027 }
1028 }
1029
1030 /* Remove physical available regions marked for removal (~0) */
1031 if (rm_pavail) {
1032 qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]),
1033 pa_cmp);
1034 phys_avail_count -= rm_pavail;
1035 for (i = 2*phys_avail_count;
1036 i < 2*(phys_avail_count + rm_pavail); i+=2)
1037 phys_avail[i] = phys_avail[i+1] = 0;
1038 }
1039
1040 physmem = btoc(physsz);
1041
1042 #ifdef PTEGCOUNT
1043 moea64_pteg_count = PTEGCOUNT;
1044 #else
1045 moea64_pteg_count = 0x1000;
1046
1047 while (moea64_pteg_count < physmem)
1048 moea64_pteg_count <<= 1;
1049
1050 moea64_pteg_count >>= 1;
1051 #endif /* PTEGCOUNT */
1052 }
1053
1054 void
moea64_mid_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1055 moea64_mid_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1056 {
1057 int i;
1058
1059 /*
1060 * Set PTEG mask
1061 */
1062 moea64_pteg_mask = moea64_pteg_count - 1;
1063
1064 /*
1065 * Initialize SLB table lock and page locks
1066 */
1067 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
1068 for (i = 0; i < PV_LOCK_COUNT; i++)
1069 mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF);
1070
1071 /*
1072 * Initialise the bootstrap pvo pool.
1073 */
1074 TUNABLE_INT_FETCH("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size);
1075 if (moea64_bpvo_pool_size == 0) {
1076 if (!hw_direct_map)
1077 moea64_bpvo_pool_size = ((ptoa((uintmax_t)physmem) * sizeof(struct vm_page)) /
1078 (PAGE_SIZE * PAGE_SIZE)) * BPVO_POOL_EXPANSION_FACTOR;
1079 else
1080 moea64_bpvo_pool_size = BPVO_POOL_SIZE;
1081 }
1082
1083 if (boothowto & RB_VERBOSE) {
1084 printf("mmu_oea64: bpvo pool entries = %d, bpvo pool size = %zu MB\n",
1085 moea64_bpvo_pool_size,
1086 moea64_bpvo_pool_size*sizeof(struct pvo_entry) / 1048576);
1087 }
1088
1089 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
1090 moea64_bpvo_pool_size*sizeof(struct pvo_entry), PAGE_SIZE);
1091 moea64_bpvo_pool_index = 0;
1092
1093 /* Place at address usable through the direct map */
1094 if (hw_direct_map)
1095 moea64_bpvo_pool = (struct pvo_entry *)
1096 PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool);
1097
1098 /*
1099 * Make sure kernel vsid is allocated as well as VSID 0.
1100 */
1101 #ifndef __powerpc64__
1102 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
1103 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
1104 moea64_vsid_bitmap[0] |= 1;
1105 #endif
1106
1107 /*
1108 * Initialize the kernel pmap (which is statically allocated).
1109 */
1110 #ifdef __powerpc64__
1111 for (i = 0; i < 64; i++) {
1112 pcpup->pc_aim.slb[i].slbv = 0;
1113 pcpup->pc_aim.slb[i].slbe = 0;
1114 }
1115 #else
1116 for (i = 0; i < 16; i++)
1117 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
1118 #endif
1119
1120 kernel_pmap->pmap_phys = kernel_pmap;
1121 CPU_FILL(&kernel_pmap->pm_active);
1122 RB_INIT(&kernel_pmap->pmap_pvo);
1123
1124 PMAP_LOCK_INIT(kernel_pmap);
1125
1126 /*
1127 * Now map in all the other buffers we allocated earlier
1128 */
1129
1130 moea64_setup_direct_map(kernelstart, kernelend);
1131 }
1132
1133 void
moea64_late_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1134 moea64_late_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1135 {
1136 ihandle_t mmui;
1137 phandle_t chosen;
1138 phandle_t mmu;
1139 ssize_t sz;
1140 int i;
1141 vm_offset_t pa, va;
1142 void *dpcpu;
1143
1144 /*
1145 * Set up the Open Firmware pmap and add its mappings if not in real
1146 * mode.
1147 */
1148
1149 chosen = OF_finddevice("/chosen");
1150 if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) {
1151 mmu = OF_instance_to_package(mmui);
1152 if (mmu == -1 ||
1153 (sz = OF_getproplen(mmu, "translations")) == -1)
1154 sz = 0;
1155 if (sz > 6144 /* tmpstksz - 2 KB headroom */)
1156 panic("moea64_bootstrap: too many ofw translations");
1157
1158 if (sz > 0)
1159 moea64_add_ofw_mappings(mmu, sz);
1160 }
1161
1162 /*
1163 * Calculate the last available physical address.
1164 */
1165 Maxmem = 0;
1166 for (i = 0; phys_avail[i + 1] != 0; i += 2)
1167 Maxmem = MAX(Maxmem, powerpc_btop(phys_avail[i + 1]));
1168
1169 /*
1170 * Initialize MMU.
1171 */
1172 pmap_cpu_bootstrap(0);
1173 mtmsr(mfmsr() | PSL_DR | PSL_IR);
1174 pmap_bootstrapped++;
1175
1176 /*
1177 * Set the start and end of kva.
1178 */
1179 virtual_avail = VM_MIN_KERNEL_ADDRESS;
1180 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
1181
1182 /*
1183 * Map the entire KVA range into the SLB. We must not fault there.
1184 */
1185 #ifdef __powerpc64__
1186 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
1187 moea64_bootstrap_slb_prefault(va, 0);
1188 #endif
1189
1190 /*
1191 * Remap any early IO mappings (console framebuffer, etc.)
1192 */
1193 bs_remap_earlyboot();
1194
1195 /*
1196 * Figure out how far we can extend virtual_end into segment 16
1197 * without running into existing mappings. Segment 16 is guaranteed
1198 * to contain neither RAM nor devices (at least on Apple hardware),
1199 * but will generally contain some OFW mappings we should not
1200 * step on.
1201 */
1202
1203 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */
1204 PMAP_LOCK(kernel_pmap);
1205 while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
1206 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
1207 virtual_end += PAGE_SIZE;
1208 PMAP_UNLOCK(kernel_pmap);
1209 #endif
1210
1211 /*
1212 * Allocate a kernel stack with a guard page for thread0 and map it
1213 * into the kernel page map.
1214 */
1215 pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE);
1216 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1217 virtual_avail = va + kstack_pages * PAGE_SIZE;
1218 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
1219 thread0.td_kstack = va;
1220 thread0.td_kstack_pages = kstack_pages;
1221 for (i = 0; i < kstack_pages; i++) {
1222 moea64_kenter(va, pa);
1223 pa += PAGE_SIZE;
1224 va += PAGE_SIZE;
1225 }
1226
1227 /*
1228 * Allocate virtual address space for the message buffer.
1229 */
1230 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
1231 msgbufp = (struct msgbuf *)virtual_avail;
1232 va = virtual_avail;
1233 virtual_avail += round_page(msgbufsize);
1234 while (va < virtual_avail) {
1235 moea64_kenter(va, pa);
1236 pa += PAGE_SIZE;
1237 va += PAGE_SIZE;
1238 }
1239
1240 /*
1241 * Allocate virtual address space for the dynamic percpu area.
1242 */
1243 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
1244 dpcpu = (void *)virtual_avail;
1245 va = virtual_avail;
1246 virtual_avail += DPCPU_SIZE;
1247 while (va < virtual_avail) {
1248 moea64_kenter(va, pa);
1249 pa += PAGE_SIZE;
1250 va += PAGE_SIZE;
1251 }
1252 dpcpu_init(dpcpu, curcpu);
1253
1254 crashdumpmap = (caddr_t)virtual_avail;
1255 virtual_avail += MAXDUMPPGS * PAGE_SIZE;
1256
1257 /*
1258 * Allocate some things for page zeroing. We put this directly
1259 * in the page table and use MOEA64_PTE_REPLACE to avoid any
1260 * of the PVO book-keeping or other parts of the VM system
1261 * from even knowing that this hack exists.
1262 */
1263
1264 if (!hw_direct_map) {
1265 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
1266 MTX_DEF);
1267 for (i = 0; i < 2; i++) {
1268 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
1269 virtual_end -= PAGE_SIZE;
1270
1271 moea64_kenter(moea64_scratchpage_va[i], 0);
1272
1273 PMAP_LOCK(kernel_pmap);
1274 moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
1275 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
1276 PMAP_UNLOCK(kernel_pmap);
1277 }
1278 }
1279
1280 numa_mem_regions(&numa_pregions, &numapregions_sz);
1281 }
1282
1283 static void
moea64_pmap_init_qpages(void)1284 moea64_pmap_init_qpages(void)
1285 {
1286 struct pcpu *pc;
1287 int i;
1288
1289 if (hw_direct_map)
1290 return;
1291
1292 CPU_FOREACH(i) {
1293 pc = pcpu_find(i);
1294 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
1295 if (pc->pc_qmap_addr == 0)
1296 panic("pmap_init_qpages: unable to allocate KVA");
1297 PMAP_LOCK(kernel_pmap);
1298 pc->pc_aim.qmap_pvo =
1299 moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr);
1300 PMAP_UNLOCK(kernel_pmap);
1301 mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF);
1302 }
1303 }
1304
1305 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL);
1306
1307 /*
1308 * Activate a user pmap. This mostly involves setting some non-CPU
1309 * state.
1310 */
1311 void
moea64_activate(struct thread * td)1312 moea64_activate(struct thread *td)
1313 {
1314 pmap_t pm;
1315
1316 pm = &td->td_proc->p_vmspace->vm_pmap;
1317 CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
1318
1319 #ifdef __powerpc64__
1320 PCPU_SET(aim.userslb, pm->pm_slb);
1321 __asm __volatile("slbmte %0, %1; isync" ::
1322 "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
1323 #else
1324 PCPU_SET(curpmap, pm->pmap_phys);
1325 mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid);
1326 #endif
1327 }
1328
1329 void
moea64_deactivate(struct thread * td)1330 moea64_deactivate(struct thread *td)
1331 {
1332 pmap_t pm;
1333
1334 __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
1335
1336 pm = &td->td_proc->p_vmspace->vm_pmap;
1337 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
1338 #ifdef __powerpc64__
1339 PCPU_SET(aim.userslb, NULL);
1340 #else
1341 PCPU_SET(curpmap, NULL);
1342 #endif
1343 }
1344
1345 void
moea64_unwire(pmap_t pm,vm_offset_t sva,vm_offset_t eva)1346 moea64_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1347 {
1348 struct pvo_entry key, *pvo;
1349 vm_page_t m;
1350 int64_t refchg;
1351
1352 key.pvo_vaddr = sva;
1353 PMAP_LOCK(pm);
1354 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
1355 pvo != NULL && PVO_VADDR(pvo) < eva;
1356 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
1357 if (PVO_IS_SP(pvo)) {
1358 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
1359 pvo = moea64_sp_unwire(pvo);
1360 continue;
1361 } else {
1362 CTR1(KTR_PMAP, "%s: demote before unwire",
1363 __func__);
1364 moea64_sp_demote(pvo);
1365 }
1366 }
1367
1368 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1369 panic("moea64_unwire: pvo %p is missing PVO_WIRED",
1370 pvo);
1371 pvo->pvo_vaddr &= ~PVO_WIRED;
1372 refchg = moea64_pte_replace(pvo, 0 /* No invalidation */);
1373 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
1374 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1375 if (refchg < 0)
1376 refchg = LPTE_CHG;
1377 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1378
1379 refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
1380 if (refchg & LPTE_CHG)
1381 vm_page_dirty(m);
1382 if (refchg & LPTE_REF)
1383 vm_page_aflag_set(m, PGA_REFERENCED);
1384 }
1385 pm->pm_stats.wired_count--;
1386 }
1387 PMAP_UNLOCK(pm);
1388 }
1389
1390 static int
moea64_mincore(pmap_t pmap,vm_offset_t addr,vm_paddr_t * pap)1391 moea64_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
1392 {
1393 struct pvo_entry *pvo;
1394 vm_paddr_t pa;
1395 vm_page_t m;
1396 int val;
1397 bool managed;
1398
1399 PMAP_LOCK(pmap);
1400
1401 pvo = moea64_pvo_find_va(pmap, addr);
1402 if (pvo != NULL) {
1403 pa = PVO_PADDR(pvo);
1404 m = PHYS_TO_VM_PAGE(pa);
1405 managed = (pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED;
1406 if (PVO_IS_SP(pvo))
1407 val = MINCORE_INCORE | MINCORE_PSIND(1);
1408 else
1409 val = MINCORE_INCORE;
1410 } else {
1411 PMAP_UNLOCK(pmap);
1412 return (0);
1413 }
1414
1415 PMAP_UNLOCK(pmap);
1416
1417 if (m == NULL)
1418 return (0);
1419
1420 if (managed) {
1421 if (moea64_is_modified(m))
1422 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
1423
1424 if (moea64_is_referenced(m))
1425 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
1426 }
1427
1428 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
1429 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
1430 managed) {
1431 *pap = pa;
1432 }
1433
1434 return (val);
1435 }
1436
1437 /*
1438 * This goes through and sets the physical address of our
1439 * special scratch PTE to the PA we want to zero or copy. Because
1440 * of locking issues (this can get called in pvo_enter() by
1441 * the UMA allocator), we can't use most other utility functions here
1442 */
1443
1444 static __inline
moea64_set_scratchpage_pa(int which,vm_paddr_t pa)1445 void moea64_set_scratchpage_pa(int which, vm_paddr_t pa)
1446 {
1447 struct pvo_entry *pvo;
1448
1449 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
1450 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
1451
1452 pvo = moea64_scratchpage_pvo[which];
1453 PMAP_LOCK(pvo->pvo_pmap);
1454 pvo->pvo_pte.pa =
1455 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
1456 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1457 PMAP_UNLOCK(pvo->pvo_pmap);
1458 isync();
1459 }
1460
1461 void
moea64_copy_page(vm_page_t msrc,vm_page_t mdst)1462 moea64_copy_page(vm_page_t msrc, vm_page_t mdst)
1463 {
1464 mtx_lock(&moea64_scratchpage_mtx);
1465
1466 moea64_set_scratchpage_pa(0, VM_PAGE_TO_PHYS(msrc));
1467 moea64_set_scratchpage_pa(1, VM_PAGE_TO_PHYS(mdst));
1468
1469 bcopy((void *)moea64_scratchpage_va[0],
1470 (void *)moea64_scratchpage_va[1], PAGE_SIZE);
1471
1472 mtx_unlock(&moea64_scratchpage_mtx);
1473 }
1474
1475 void
moea64_copy_page_dmap(vm_page_t msrc,vm_page_t mdst)1476 moea64_copy_page_dmap(vm_page_t msrc, vm_page_t mdst)
1477 {
1478 vm_offset_t dst;
1479 vm_offset_t src;
1480
1481 dst = VM_PAGE_TO_PHYS(mdst);
1482 src = VM_PAGE_TO_PHYS(msrc);
1483
1484 bcopy((void *)PHYS_TO_DMAP(src), (void *)PHYS_TO_DMAP(dst),
1485 PAGE_SIZE);
1486 }
1487
1488 inline void
moea64_copy_pages_dmap(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1489 moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
1490 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1491 {
1492 void *a_cp, *b_cp;
1493 vm_offset_t a_pg_offset, b_pg_offset;
1494 int cnt;
1495
1496 while (xfersize > 0) {
1497 a_pg_offset = a_offset & PAGE_MASK;
1498 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1499 a_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1500 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])) +
1501 a_pg_offset;
1502 b_pg_offset = b_offset & PAGE_MASK;
1503 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1504 b_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1505 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])) +
1506 b_pg_offset;
1507 bcopy(a_cp, b_cp, cnt);
1508 a_offset += cnt;
1509 b_offset += cnt;
1510 xfersize -= cnt;
1511 }
1512 }
1513
1514 void
moea64_copy_pages(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1515 moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
1516 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1517 {
1518 void *a_cp, *b_cp;
1519 vm_offset_t a_pg_offset, b_pg_offset;
1520 int cnt;
1521
1522 mtx_lock(&moea64_scratchpage_mtx);
1523 while (xfersize > 0) {
1524 a_pg_offset = a_offset & PAGE_MASK;
1525 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1526 moea64_set_scratchpage_pa(0,
1527 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
1528 a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
1529 b_pg_offset = b_offset & PAGE_MASK;
1530 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1531 moea64_set_scratchpage_pa(1,
1532 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
1533 b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
1534 bcopy(a_cp, b_cp, cnt);
1535 a_offset += cnt;
1536 b_offset += cnt;
1537 xfersize -= cnt;
1538 }
1539 mtx_unlock(&moea64_scratchpage_mtx);
1540 }
1541
1542 void
moea64_zero_page_area(vm_page_t m,int off,int size)1543 moea64_zero_page_area(vm_page_t m, int off, int size)
1544 {
1545 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1546
1547 if (size + off > PAGE_SIZE)
1548 panic("moea64_zero_page: size + off > PAGE_SIZE");
1549
1550 if (hw_direct_map) {
1551 bzero((caddr_t)(uintptr_t)PHYS_TO_DMAP(pa) + off, size);
1552 } else {
1553 mtx_lock(&moea64_scratchpage_mtx);
1554 moea64_set_scratchpage_pa(0, pa);
1555 bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
1556 mtx_unlock(&moea64_scratchpage_mtx);
1557 }
1558 }
1559
1560 /*
1561 * Zero a page of physical memory by temporarily mapping it
1562 */
1563 void
moea64_zero_page(vm_page_t m)1564 moea64_zero_page(vm_page_t m)
1565 {
1566 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1567 vm_offset_t va, off;
1568
1569 mtx_lock(&moea64_scratchpage_mtx);
1570
1571 moea64_set_scratchpage_pa(0, pa);
1572 va = moea64_scratchpage_va[0];
1573
1574 for (off = 0; off < PAGE_SIZE; off += cacheline_size)
1575 __asm __volatile("dcbz 0,%0" :: "r"(va + off));
1576
1577 mtx_unlock(&moea64_scratchpage_mtx);
1578 }
1579
1580 void
moea64_zero_page_dmap(vm_page_t m)1581 moea64_zero_page_dmap(vm_page_t m)
1582 {
1583 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1584 vm_offset_t va, off;
1585
1586 va = PHYS_TO_DMAP(pa);
1587 for (off = 0; off < PAGE_SIZE; off += cacheline_size)
1588 __asm __volatile("dcbz 0,%0" :: "r"(va + off));
1589 }
1590
1591 vm_offset_t
moea64_quick_enter_page(vm_page_t m)1592 moea64_quick_enter_page(vm_page_t m)
1593 {
1594 struct pvo_entry *pvo;
1595 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1596
1597 /*
1598 * MOEA64_PTE_REPLACE does some locking, so we can't just grab
1599 * a critical section and access the PCPU data like on i386.
1600 * Instead, pin the thread and grab the PCPU lock to prevent
1601 * a preempting thread from using the same PCPU data.
1602 */
1603 sched_pin();
1604
1605 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED);
1606 pvo = PCPU_GET(aim.qmap_pvo);
1607
1608 mtx_lock(PCPU_PTR(aim.qmap_lock));
1609 pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) |
1610 (uint64_t)pa;
1611 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1612 isync();
1613
1614 return (PCPU_GET(qmap_addr));
1615 }
1616
1617 vm_offset_t
moea64_quick_enter_page_dmap(vm_page_t m)1618 moea64_quick_enter_page_dmap(vm_page_t m)
1619 {
1620
1621 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
1622 }
1623
1624 void
moea64_quick_remove_page(vm_offset_t addr)1625 moea64_quick_remove_page(vm_offset_t addr)
1626 {
1627
1628 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED);
1629 KASSERT(PCPU_GET(qmap_addr) == addr,
1630 ("moea64_quick_remove_page: invalid address"));
1631 mtx_unlock(PCPU_PTR(aim.qmap_lock));
1632 sched_unpin();
1633 }
1634
1635 bool
moea64_page_is_mapped(vm_page_t m)1636 moea64_page_is_mapped(vm_page_t m)
1637 {
1638 return (!LIST_EMPTY(&(m)->md.mdpg_pvoh));
1639 }
1640
1641 /*
1642 * Map the given physical page at the specified virtual address in the
1643 * target pmap with the protection requested. If specified the page
1644 * will be wired down.
1645 */
1646
1647 int
moea64_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)1648 moea64_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
1649 vm_prot_t prot, u_int flags, int8_t psind)
1650 {
1651 struct pvo_entry *pvo, *oldpvo, *tpvo;
1652 struct pvo_head *pvo_head;
1653 uint64_t pte_lo;
1654 int error;
1655 vm_paddr_t pa;
1656
1657 if ((m->oflags & VPO_UNMANAGED) == 0) {
1658 if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0)
1659 VM_PAGE_OBJECT_BUSY_ASSERT(m);
1660 else
1661 VM_OBJECT_ASSERT_LOCKED(m->object);
1662 }
1663
1664 if (psind > 0)
1665 return (moea64_sp_enter(pmap, va, m, prot, flags, psind));
1666
1667 pvo = alloc_pvo_entry(0);
1668 if (pvo == NULL)
1669 return (KERN_RESOURCE_SHORTAGE);
1670 pvo->pvo_pmap = NULL; /* to be filled in later */
1671 pvo->pvo_pte.prot = prot;
1672
1673 pa = VM_PAGE_TO_PHYS(m);
1674 pte_lo = moea64_calc_wimg(pa, pmap_page_get_memattr(m));
1675 pvo->pvo_pte.pa = pa | pte_lo;
1676
1677 if ((flags & PMAP_ENTER_WIRED) != 0)
1678 pvo->pvo_vaddr |= PVO_WIRED;
1679
1680 if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) {
1681 pvo_head = NULL;
1682 } else {
1683 pvo_head = &m->md.mdpg_pvoh;
1684 pvo->pvo_vaddr |= PVO_MANAGED;
1685 }
1686
1687 PV_LOCK(pa);
1688 PMAP_LOCK(pmap);
1689 if (pvo->pvo_pmap == NULL)
1690 init_pvo_entry(pvo, pmap, va);
1691
1692 if (moea64_ps_enabled(pmap) &&
1693 (tpvo = moea64_pvo_find_va(pmap, va & ~HPT_SP_MASK)) != NULL &&
1694 PVO_IS_SP(tpvo)) {
1695 /* Demote SP before entering a regular page */
1696 CTR2(KTR_PMAP, "%s: demote before enter: va=%#jx",
1697 __func__, (uintmax_t)va);
1698 moea64_sp_demote_aligned(tpvo);
1699 }
1700
1701 if (prot & VM_PROT_WRITE)
1702 if (pmap_bootstrapped &&
1703 (m->oflags & VPO_UNMANAGED) == 0)
1704 vm_page_aflag_set(m, PGA_WRITEABLE);
1705
1706 error = moea64_pvo_enter(pvo, pvo_head, &oldpvo);
1707 if (error == EEXIST) {
1708 if (oldpvo->pvo_vaddr == pvo->pvo_vaddr &&
1709 oldpvo->pvo_pte.pa == pvo->pvo_pte.pa &&
1710 oldpvo->pvo_pte.prot == prot) {
1711 /* Identical mapping already exists */
1712 error = 0;
1713
1714 /* If not in page table, reinsert it */
1715 if (moea64_pte_synch(oldpvo) < 0) {
1716 STAT_MOEA64(moea64_pte_overflow--);
1717 moea64_pte_insert(oldpvo);
1718 }
1719
1720 /* Then just clean up and go home */
1721 PMAP_UNLOCK(pmap);
1722 PV_UNLOCK(pa);
1723 free_pvo_entry(pvo);
1724 pvo = NULL;
1725 goto out;
1726 } else {
1727 /* Otherwise, need to kill it first */
1728 KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old "
1729 "mapping does not match new mapping"));
1730 moea64_pvo_remove_from_pmap(oldpvo);
1731 moea64_pvo_enter(pvo, pvo_head, NULL);
1732 }
1733 }
1734 PMAP_UNLOCK(pmap);
1735 PV_UNLOCK(pa);
1736
1737 /* Free any dead pages */
1738 if (error == EEXIST) {
1739 moea64_pvo_remove_from_page(oldpvo);
1740 free_pvo_entry(oldpvo);
1741 }
1742
1743 out:
1744 /*
1745 * Flush the page from the instruction cache if this page is
1746 * mapped executable and cacheable.
1747 */
1748 if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 &&
1749 (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
1750 vm_page_aflag_set(m, PGA_EXECUTABLE);
1751 moea64_syncicache(pmap, va, pa, PAGE_SIZE);
1752 }
1753
1754 #if VM_NRESERVLEVEL > 0
1755 /*
1756 * Try to promote pages.
1757 *
1758 * If the VA of the entered page is not aligned with its PA,
1759 * don't try page promotion as it is not possible.
1760 * This reduces the number of promotion failures dramatically.
1761 *
1762 * Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
1763 */
1764 if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
1765 (pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
1766 (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
1767 ((prot & VM_PROT_NO_PROMOTE) == 0 ||
1768 (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
1769 (m->flags & PG_FICTITIOUS) == 0 &&
1770 vm_reserv_level_iffullpop(m) == 0)
1771 moea64_sp_promote(pmap, va, m);
1772 #endif
1773
1774 return (KERN_SUCCESS);
1775 }
1776
1777 static void
moea64_syncicache(pmap_t pmap,vm_offset_t va,vm_paddr_t pa,vm_size_t sz)1778 moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
1779 vm_size_t sz)
1780 {
1781
1782 /*
1783 * This is much trickier than on older systems because
1784 * we can't sync the icache on physical addresses directly
1785 * without a direct map. Instead we check a couple of cases
1786 * where the memory is already mapped in and, failing that,
1787 * use the same trick we use for page zeroing to create
1788 * a temporary mapping for this physical address.
1789 */
1790
1791 if (!pmap_bootstrapped) {
1792 /*
1793 * If PMAP is not bootstrapped, we are likely to be
1794 * in real mode.
1795 */
1796 __syncicache((void *)(uintptr_t)pa, sz);
1797 } else if (pmap == kernel_pmap) {
1798 __syncicache((void *)va, sz);
1799 } else if (hw_direct_map) {
1800 __syncicache((void *)(uintptr_t)PHYS_TO_DMAP(pa), sz);
1801 } else {
1802 /* Use the scratch page to set up a temp mapping */
1803
1804 mtx_lock(&moea64_scratchpage_mtx);
1805
1806 moea64_set_scratchpage_pa(1, pa & ~ADDR_POFF);
1807 __syncicache((void *)(moea64_scratchpage_va[1] +
1808 (va & ADDR_POFF)), sz);
1809
1810 mtx_unlock(&moea64_scratchpage_mtx);
1811 }
1812 }
1813
1814 /*
1815 * Maps a sequence of resident pages belonging to the same object.
1816 * The sequence begins with the given page m_start. This page is
1817 * mapped at the given virtual address start. Each subsequent page is
1818 * mapped at a virtual address that is offset from start by the same
1819 * amount as the page is offset from m_start within the object. The
1820 * last page in the sequence is the page with the largest offset from
1821 * m_start that can be mapped at a virtual address less than the given
1822 * virtual address end. Not every virtual page between start and end
1823 * is mapped; only those for which a resident page exists with the
1824 * corresponding offset from m_start are mapped.
1825 */
1826 void
moea64_enter_object(pmap_t pm,vm_offset_t start,vm_offset_t end,vm_page_t m_start,vm_prot_t prot)1827 moea64_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1828 vm_page_t m_start, vm_prot_t prot)
1829 {
1830 struct pctrie_iter pages;
1831 vm_page_t m;
1832 vm_offset_t va;
1833 int8_t psind;
1834
1835 VM_OBJECT_ASSERT_LOCKED(m_start->object);
1836
1837 vm_page_iter_limit_init(&pages, m_start->object,
1838 m_start->pindex + atop(end - start));
1839 m = vm_radix_iter_lookup(&pages, m_start->pindex);
1840 while (m != NULL) {
1841 va = start + ptoa(m->pindex - m_start->pindex);
1842 if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end &&
1843 m->psind == 1 && moea64_ps_enabled(pm))
1844 psind = 1;
1845 else
1846 psind = 0;
1847 moea64_enter(pm, va, m, prot &
1848 (VM_PROT_READ | VM_PROT_EXECUTE),
1849 PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind);
1850 if (psind == 1)
1851 m = vm_radix_iter_jump(&pages, HPT_SP_SIZE / PAGE_SIZE);
1852 else
1853 m = vm_radix_iter_step(&pages);
1854 }
1855 }
1856
1857 void
moea64_enter_quick(pmap_t pm,vm_offset_t va,vm_page_t m,vm_prot_t prot)1858 moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
1859 vm_prot_t prot)
1860 {
1861
1862 moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
1863 VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
1864 0);
1865 }
1866
1867 vm_paddr_t
moea64_extract(pmap_t pm,vm_offset_t va)1868 moea64_extract(pmap_t pm, vm_offset_t va)
1869 {
1870 struct pvo_entry *pvo;
1871 vm_paddr_t pa;
1872
1873 PMAP_LOCK(pm);
1874 pvo = moea64_pvo_find_va(pm, va);
1875 if (pvo == NULL)
1876 pa = 0;
1877 else
1878 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
1879 PMAP_UNLOCK(pm);
1880
1881 return (pa);
1882 }
1883
1884 /*
1885 * Atomically extract and hold the physical page with the given
1886 * pmap and virtual address pair if that mapping permits the given
1887 * protection.
1888 */
1889 vm_page_t
moea64_extract_and_hold(pmap_t pmap,vm_offset_t va,vm_prot_t prot)1890 moea64_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1891 {
1892 struct pvo_entry *pvo;
1893 vm_page_t m;
1894
1895 m = NULL;
1896 PMAP_LOCK(pmap);
1897 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1898 if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
1899 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1900 if (!vm_page_wire_mapped(m))
1901 m = NULL;
1902 }
1903 PMAP_UNLOCK(pmap);
1904 return (m);
1905 }
1906
1907 static void *
moea64_uma_page_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * flags,int wait)1908 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
1909 uint8_t *flags, int wait)
1910 {
1911 struct pvo_entry *pvo;
1912 vm_offset_t va;
1913 vm_page_t m;
1914 int needed_lock;
1915
1916 /*
1917 * This entire routine is a horrible hack to avoid bothering kmem
1918 * for new KVA addresses. Because this can get called from inside
1919 * kmem allocation routines, calling kmem for a new address here
1920 * can lead to multiply locking non-recursive mutexes.
1921 */
1922
1923 *flags = UMA_SLAB_PRIV;
1924 needed_lock = !PMAP_LOCKED(kernel_pmap);
1925
1926 m = vm_page_alloc_noobj_domain(domain, malloc2vm_flags(wait) |
1927 VM_ALLOC_WIRED);
1928 if (m == NULL)
1929 return (NULL);
1930
1931 va = VM_PAGE_TO_PHYS(m);
1932
1933 pvo = alloc_pvo_entry(1 /* bootstrap */);
1934
1935 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE;
1936 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M;
1937
1938 if (needed_lock)
1939 PMAP_LOCK(kernel_pmap);
1940
1941 init_pvo_entry(pvo, kernel_pmap, va);
1942 pvo->pvo_vaddr |= PVO_WIRED;
1943
1944 moea64_pvo_enter(pvo, NULL, NULL);
1945
1946 if (needed_lock)
1947 PMAP_UNLOCK(kernel_pmap);
1948
1949 return (void *)va;
1950 }
1951
1952 extern int elf32_nxstack;
1953
1954 void
moea64_init(void)1955 moea64_init(void)
1956 {
1957
1958 CTR0(KTR_PMAP, "moea64_init");
1959
1960 moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1961 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1962 UMA_ZONE_VM | UMA_ZONE_NOFREE);
1963
1964 /*
1965 * Are large page mappings enabled?
1966 *
1967 * While HPT superpages are not better tested, leave it disabled by
1968 * default.
1969 */
1970 superpages_enabled = 0;
1971 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
1972 if (superpages_enabled) {
1973 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
1974 ("moea64_init: can't assign to pagesizes[1]"));
1975
1976 if (moea64_large_page_size == 0) {
1977 printf("mmu_oea64: HW does not support large pages. "
1978 "Disabling superpages...\n");
1979 superpages_enabled = 0;
1980 } else if (!moea64_has_lp_4k_16m) {
1981 printf("mmu_oea64: "
1982 "HW does not support mixed 4KB/16MB page sizes. "
1983 "Disabling superpages...\n");
1984 superpages_enabled = 0;
1985 } else
1986 pagesizes[1] = HPT_SP_SIZE;
1987 }
1988
1989 if (!hw_direct_map) {
1990 uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
1991 }
1992
1993 #ifdef COMPAT_FREEBSD32
1994 elf32_nxstack = 1;
1995 #endif
1996
1997 moea64_initialized = true;
1998 }
1999
2000 bool
moea64_is_referenced(vm_page_t m)2001 moea64_is_referenced(vm_page_t m)
2002 {
2003
2004 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2005 ("moea64_is_referenced: page %p is not managed", m));
2006
2007 return (moea64_query_bit(m, LPTE_REF));
2008 }
2009
2010 bool
moea64_is_modified(vm_page_t m)2011 moea64_is_modified(vm_page_t m)
2012 {
2013
2014 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2015 ("moea64_is_modified: page %p is not managed", m));
2016
2017 /*
2018 * If the page is not busied then this check is racy.
2019 */
2020 if (!pmap_page_is_write_mapped(m))
2021 return (false);
2022
2023 return (moea64_query_bit(m, LPTE_CHG));
2024 }
2025
2026 bool
moea64_is_prefaultable(pmap_t pmap,vm_offset_t va)2027 moea64_is_prefaultable(pmap_t pmap, vm_offset_t va)
2028 {
2029 struct pvo_entry *pvo;
2030 bool rv = true;
2031
2032 PMAP_LOCK(pmap);
2033 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
2034 if (pvo != NULL)
2035 rv = false;
2036 PMAP_UNLOCK(pmap);
2037 return (rv);
2038 }
2039
2040 void
moea64_clear_modify(vm_page_t m)2041 moea64_clear_modify(vm_page_t m)
2042 {
2043
2044 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2045 ("moea64_clear_modify: page %p is not managed", m));
2046 vm_page_assert_busied(m);
2047
2048 if (!pmap_page_is_write_mapped(m))
2049 return;
2050 moea64_clear_bit(m, LPTE_CHG);
2051 }
2052
2053 /*
2054 * Clear the write and modified bits in each of the given page's mappings.
2055 */
2056 void
moea64_remove_write(vm_page_t m)2057 moea64_remove_write(vm_page_t m)
2058 {
2059 struct pvo_entry *pvo;
2060 int64_t refchg, ret;
2061 pmap_t pmap;
2062
2063 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2064 ("moea64_remove_write: page %p is not managed", m));
2065 vm_page_assert_busied(m);
2066
2067 if (!pmap_page_is_write_mapped(m))
2068 return;
2069
2070 powerpc_sync();
2071 PV_PAGE_LOCK(m);
2072 refchg = 0;
2073 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2074 pmap = pvo->pvo_pmap;
2075 PMAP_LOCK(pmap);
2076 if (!(pvo->pvo_vaddr & PVO_DEAD) &&
2077 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2078 if (PVO_IS_SP(pvo)) {
2079 CTR1(KTR_PMAP, "%s: demote before remwr",
2080 __func__);
2081 moea64_sp_demote(pvo);
2082 }
2083 pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
2084 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2085 if (ret < 0)
2086 ret = LPTE_CHG;
2087 refchg |= ret;
2088 if (pvo->pvo_pmap == kernel_pmap)
2089 isync();
2090 }
2091 PMAP_UNLOCK(pmap);
2092 }
2093 if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
2094 vm_page_dirty(m);
2095 vm_page_aflag_clear(m, PGA_WRITEABLE);
2096 PV_PAGE_UNLOCK(m);
2097 }
2098
2099 /*
2100 * moea64_ts_referenced:
2101 *
2102 * Return a count of reference bits for a page, clearing those bits.
2103 * It is not necessary for every reference bit to be cleared, but it
2104 * is necessary that 0 only be returned when there are truly no
2105 * reference bits set.
2106 *
2107 * XXX: The exact number of bits to check and clear is a matter that
2108 * should be tested and standardized at some point in the future for
2109 * optimal aging of shared pages.
2110 */
2111 int
moea64_ts_referenced(vm_page_t m)2112 moea64_ts_referenced(vm_page_t m)
2113 {
2114
2115 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2116 ("moea64_ts_referenced: page %p is not managed", m));
2117 return (moea64_clear_bit(m, LPTE_REF));
2118 }
2119
2120 /*
2121 * Modify the WIMG settings of all mappings for a page.
2122 */
2123 void
moea64_page_set_memattr(vm_page_t m,vm_memattr_t ma)2124 moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2125 {
2126 struct pvo_entry *pvo;
2127 int64_t refchg;
2128 pmap_t pmap;
2129 uint64_t lo;
2130
2131 CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
2132 __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
2133
2134 if ((m->oflags & VPO_UNMANAGED) != 0) {
2135 m->md.mdpg_cache_attrs = ma;
2136 return;
2137 }
2138
2139 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
2140
2141 PV_PAGE_LOCK(m);
2142 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2143 pmap = pvo->pvo_pmap;
2144 PMAP_LOCK(pmap);
2145 if (!(pvo->pvo_vaddr & PVO_DEAD)) {
2146 if (PVO_IS_SP(pvo)) {
2147 CTR1(KTR_PMAP,
2148 "%s: demote before set_memattr", __func__);
2149 moea64_sp_demote(pvo);
2150 }
2151 pvo->pvo_pte.pa &= ~LPTE_WIMG;
2152 pvo->pvo_pte.pa |= lo;
2153 refchg = moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
2154 if (refchg < 0)
2155 refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
2156 LPTE_CHG : 0;
2157 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2158 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2159 refchg |=
2160 atomic_readandclear_32(&m->md.mdpg_attrs);
2161 if (refchg & LPTE_CHG)
2162 vm_page_dirty(m);
2163 if (refchg & LPTE_REF)
2164 vm_page_aflag_set(m, PGA_REFERENCED);
2165 }
2166 if (pvo->pvo_pmap == kernel_pmap)
2167 isync();
2168 }
2169 PMAP_UNLOCK(pmap);
2170 }
2171 m->md.mdpg_cache_attrs = ma;
2172 PV_PAGE_UNLOCK(m);
2173 }
2174
2175 /*
2176 * Map a wired page into kernel virtual address space.
2177 */
2178 void
moea64_kenter_attr(vm_offset_t va,vm_paddr_t pa,vm_memattr_t ma)2179 moea64_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
2180 {
2181 int error;
2182 struct pvo_entry *pvo, *oldpvo;
2183
2184 do {
2185 pvo = alloc_pvo_entry(0);
2186 if (pvo == NULL)
2187 vm_wait(NULL);
2188 } while (pvo == NULL);
2189 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
2190 pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma);
2191 pvo->pvo_vaddr |= PVO_WIRED;
2192
2193 PMAP_LOCK(kernel_pmap);
2194 oldpvo = moea64_pvo_find_va(kernel_pmap, va);
2195 if (oldpvo != NULL)
2196 moea64_pvo_remove_from_pmap(oldpvo);
2197 init_pvo_entry(pvo, kernel_pmap, va);
2198 error = moea64_pvo_enter(pvo, NULL, NULL);
2199 PMAP_UNLOCK(kernel_pmap);
2200
2201 /* Free any dead pages */
2202 if (oldpvo != NULL) {
2203 moea64_pvo_remove_from_page(oldpvo);
2204 free_pvo_entry(oldpvo);
2205 }
2206
2207 if (error != 0)
2208 panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va,
2209 (uintmax_t)pa, error);
2210 }
2211
2212 void
moea64_kenter(vm_offset_t va,vm_paddr_t pa)2213 moea64_kenter(vm_offset_t va, vm_paddr_t pa)
2214 {
2215
2216 moea64_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
2217 }
2218
2219 /*
2220 * Extract the physical page address associated with the given kernel virtual
2221 * address.
2222 */
2223 vm_paddr_t
moea64_kextract(vm_offset_t va)2224 moea64_kextract(vm_offset_t va)
2225 {
2226 struct pvo_entry *pvo;
2227 vm_paddr_t pa;
2228
2229 /*
2230 * Shortcut the direct-mapped case when applicable. We never put
2231 * anything but 1:1 (or 62-bit aliased) mappings below
2232 * VM_MIN_KERNEL_ADDRESS.
2233 */
2234 if (va < VM_MIN_KERNEL_ADDRESS)
2235 return (va & ~DMAP_BASE_ADDRESS);
2236
2237 PMAP_LOCK(kernel_pmap);
2238 pvo = moea64_pvo_find_va(kernel_pmap, va);
2239 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
2240 va));
2241 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
2242 PMAP_UNLOCK(kernel_pmap);
2243 return (pa);
2244 }
2245
2246 /*
2247 * Remove a wired page from kernel virtual address space.
2248 */
2249 void
moea64_kremove(vm_offset_t va)2250 moea64_kremove(vm_offset_t va)
2251 {
2252 moea64_remove(kernel_pmap, va, va + PAGE_SIZE);
2253 }
2254
2255 /*
2256 * Provide a kernel pointer corresponding to a given userland pointer.
2257 * The returned pointer is valid until the next time this function is
2258 * called in this thread. This is used internally in copyin/copyout.
2259 */
2260 static int
moea64_map_user_ptr(pmap_t pm,volatile const void * uaddr,void ** kaddr,size_t ulen,size_t * klen)2261 moea64_map_user_ptr(pmap_t pm, volatile const void *uaddr,
2262 void **kaddr, size_t ulen, size_t *klen)
2263 {
2264 size_t l;
2265 #ifdef __powerpc64__
2266 struct slb *slb;
2267 #endif
2268 register_t slbv;
2269
2270 *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK);
2271 l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr);
2272 if (l > ulen)
2273 l = ulen;
2274 if (klen)
2275 *klen = l;
2276 else if (l != ulen)
2277 return (EFAULT);
2278
2279 #ifdef __powerpc64__
2280 /* Try lockless look-up first */
2281 slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr);
2282
2283 if (slb == NULL) {
2284 /* If it isn't there, we need to pre-fault the VSID */
2285 PMAP_LOCK(pm);
2286 slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT;
2287 PMAP_UNLOCK(pm);
2288 } else {
2289 slbv = slb->slbv;
2290 }
2291
2292 /* Mark segment no-execute */
2293 slbv |= SLBV_N;
2294 #else
2295 slbv = va_to_vsid(pm, (vm_offset_t)uaddr);
2296
2297 /* Mark segment no-execute */
2298 slbv |= SR_N;
2299 #endif
2300
2301 /* If we have already set this VSID, we can just return */
2302 if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv)
2303 return (0);
2304
2305 __asm __volatile("isync");
2306 curthread->td_pcb->pcb_cpu.aim.usr_segm =
2307 (uintptr_t)uaddr >> ADDR_SR_SHFT;
2308 curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv;
2309 #ifdef __powerpc64__
2310 __asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
2311 "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
2312 #else
2313 __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv));
2314 #endif
2315
2316 return (0);
2317 }
2318
2319 /*
2320 * Figure out where a given kernel pointer (usually in a fault) points
2321 * to from the VM's perspective, potentially remapping into userland's
2322 * address space.
2323 */
2324 static int
moea64_decode_kernel_ptr(vm_offset_t addr,int * is_user,vm_offset_t * decoded_addr)2325 moea64_decode_kernel_ptr(vm_offset_t addr, int *is_user,
2326 vm_offset_t *decoded_addr)
2327 {
2328 vm_offset_t user_sr;
2329
2330 if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
2331 user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm;
2332 addr &= ADDR_PIDX | ADDR_POFF;
2333 addr |= user_sr << ADDR_SR_SHFT;
2334 *decoded_addr = addr;
2335 *is_user = 1;
2336 } else {
2337 *decoded_addr = addr;
2338 *is_user = 0;
2339 }
2340
2341 return (0);
2342 }
2343
2344 /*
2345 * Map a range of physical addresses into kernel virtual address space.
2346 *
2347 * The value passed in *virt is a suggested virtual address for the mapping.
2348 * Architectures which can support a direct-mapped physical to virtual region
2349 * can return the appropriate address within that region, leaving '*virt'
2350 * unchanged. Other architectures should map the pages starting at '*virt' and
2351 * update '*virt' with the first usable address after the mapped region.
2352 */
2353 vm_offset_t
moea64_map(vm_offset_t * virt,vm_paddr_t pa_start,vm_paddr_t pa_end,int prot)2354 moea64_map(vm_offset_t *virt, vm_paddr_t pa_start,
2355 vm_paddr_t pa_end, int prot)
2356 {
2357 vm_offset_t sva, va;
2358
2359 if (hw_direct_map) {
2360 /*
2361 * Check if every page in the region is covered by the direct
2362 * map. The direct map covers all of physical memory. Use
2363 * moea64_calc_wimg() as a shortcut to see if the page is in
2364 * physical memory as a way to see if the direct map covers it.
2365 */
2366 for (va = pa_start; va < pa_end; va += PAGE_SIZE)
2367 if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M)
2368 break;
2369 if (va == pa_end)
2370 return (PHYS_TO_DMAP(pa_start));
2371 }
2372 sva = *virt;
2373 va = sva;
2374 /* XXX respect prot argument */
2375 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
2376 moea64_kenter(va, pa_start);
2377 *virt = va;
2378
2379 return (sva);
2380 }
2381
2382 /*
2383 * Returns true if the pmap's pv is one of the first
2384 * 16 pvs linked to from this page. This count may
2385 * be changed upwards or downwards in the future; it
2386 * is only necessary that true be returned for a small
2387 * subset of pmaps for proper page aging.
2388 */
2389 bool
moea64_page_exists_quick(pmap_t pmap,vm_page_t m)2390 moea64_page_exists_quick(pmap_t pmap, vm_page_t m)
2391 {
2392 int loops;
2393 struct pvo_entry *pvo;
2394 bool rv;
2395
2396 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2397 ("moea64_page_exists_quick: page %p is not managed", m));
2398 loops = 0;
2399 rv = false;
2400 PV_PAGE_LOCK(m);
2401 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2402 if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) {
2403 rv = true;
2404 break;
2405 }
2406 if (++loops >= 16)
2407 break;
2408 }
2409 PV_PAGE_UNLOCK(m);
2410 return (rv);
2411 }
2412
2413 void
moea64_page_init(vm_page_t m)2414 moea64_page_init(vm_page_t m)
2415 {
2416
2417 m->md.mdpg_attrs = 0;
2418 m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT;
2419 LIST_INIT(&m->md.mdpg_pvoh);
2420 }
2421
2422 /*
2423 * Return the number of managed mappings to the given physical page
2424 * that are wired.
2425 */
2426 int
moea64_page_wired_mappings(vm_page_t m)2427 moea64_page_wired_mappings(vm_page_t m)
2428 {
2429 struct pvo_entry *pvo;
2430 int count;
2431
2432 count = 0;
2433 if ((m->oflags & VPO_UNMANAGED) != 0)
2434 return (count);
2435 PV_PAGE_LOCK(m);
2436 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
2437 if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED)
2438 count++;
2439 PV_PAGE_UNLOCK(m);
2440 return (count);
2441 }
2442
2443 static uintptr_t moea64_vsidcontext;
2444
2445 uintptr_t
moea64_get_unique_vsid(void)2446 moea64_get_unique_vsid(void) {
2447 u_int entropy;
2448 register_t hash;
2449 uint32_t mask;
2450 int i;
2451
2452 entropy = 0;
2453 __asm __volatile("mftb %0" : "=r"(entropy));
2454
2455 mtx_lock(&moea64_slb_mutex);
2456 for (i = 0; i < NVSIDS; i += VSID_NBPW) {
2457 u_int n;
2458
2459 /*
2460 * Create a new value by multiplying by a prime and adding in
2461 * entropy from the timebase register. This is to make the
2462 * VSID more random so that the PT hash function collides
2463 * less often. (Note that the prime casues gcc to do shifts
2464 * instead of a multiply.)
2465 */
2466 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
2467 hash = moea64_vsidcontext & (NVSIDS - 1);
2468 if (hash == 0) /* 0 is special, avoid it */
2469 continue;
2470 n = hash >> 5;
2471 mask = 1 << (hash & (VSID_NBPW - 1));
2472 hash = (moea64_vsidcontext & VSID_HASHMASK);
2473 if (moea64_vsid_bitmap[n] & mask) { /* collision? */
2474 /* anything free in this bucket? */
2475 if (moea64_vsid_bitmap[n] == 0xffffffff) {
2476 entropy = (moea64_vsidcontext >> 20);
2477 continue;
2478 }
2479 i = ffs(~moea64_vsid_bitmap[n]) - 1;
2480 mask = 1 << i;
2481 hash &= rounddown2(VSID_HASHMASK, VSID_NBPW);
2482 hash |= i;
2483 }
2484 if (hash == VSID_VRMA) /* also special, avoid this too */
2485 continue;
2486 KASSERT(!(moea64_vsid_bitmap[n] & mask),
2487 ("Allocating in-use VSID %#zx\n", hash));
2488 moea64_vsid_bitmap[n] |= mask;
2489 mtx_unlock(&moea64_slb_mutex);
2490 return (hash);
2491 }
2492
2493 mtx_unlock(&moea64_slb_mutex);
2494 panic("%s: out of segments",__func__);
2495 }
2496
2497 #ifdef __powerpc64__
2498 int
moea64_pinit(pmap_t pmap)2499 moea64_pinit(pmap_t pmap)
2500 {
2501
2502 RB_INIT(&pmap->pmap_pvo);
2503
2504 pmap->pm_slb_tree_root = slb_alloc_tree();
2505 pmap->pm_slb = slb_alloc_user_cache();
2506 pmap->pm_slb_len = 0;
2507
2508 return (1);
2509 }
2510 #else
2511 int
moea64_pinit(pmap_t pmap)2512 moea64_pinit(pmap_t pmap)
2513 {
2514 int i;
2515 uint32_t hash;
2516
2517 RB_INIT(&pmap->pmap_pvo);
2518
2519 if (pmap_bootstrapped)
2520 pmap->pmap_phys = (pmap_t)moea64_kextract((vm_offset_t)pmap);
2521 else
2522 pmap->pmap_phys = pmap;
2523
2524 /*
2525 * Allocate some segment registers for this pmap.
2526 */
2527 hash = moea64_get_unique_vsid();
2528
2529 for (i = 0; i < 16; i++)
2530 pmap->pm_sr[i] = VSID_MAKE(i, hash);
2531
2532 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
2533
2534 return (1);
2535 }
2536 #endif
2537
2538 /*
2539 * Initialize the pmap associated with process 0.
2540 */
2541 void
moea64_pinit0(pmap_t pm)2542 moea64_pinit0(pmap_t pm)
2543 {
2544
2545 PMAP_LOCK_INIT(pm);
2546 moea64_pinit(pm);
2547 bzero(&pm->pm_stats, sizeof(pm->pm_stats));
2548 }
2549
2550 /*
2551 * Set the physical protection on the specified range of this map as requested.
2552 */
2553 static void
moea64_pvo_protect(pmap_t pm,struct pvo_entry * pvo,vm_prot_t prot)2554 moea64_pvo_protect( pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
2555 {
2556 struct vm_page *pg;
2557 vm_prot_t oldprot;
2558 int32_t refchg;
2559
2560 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2561
2562 /*
2563 * Change the protection of the page.
2564 */
2565 oldprot = pvo->pvo_pte.prot;
2566 pvo->pvo_pte.prot = prot;
2567 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2568
2569 /*
2570 * If the PVO is in the page table, update mapping
2571 */
2572 refchg = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2573 if (refchg < 0)
2574 refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
2575
2576 if (pm != kernel_pmap && pg != NULL &&
2577 (pg->a.flags & PGA_EXECUTABLE) == 0 &&
2578 (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
2579 if ((pg->oflags & VPO_UNMANAGED) == 0)
2580 vm_page_aflag_set(pg, PGA_EXECUTABLE);
2581 moea64_syncicache(pm, PVO_VADDR(pvo),
2582 PVO_PADDR(pvo), PAGE_SIZE);
2583 }
2584
2585 /*
2586 * Update vm about the REF/CHG bits if the page is managed and we have
2587 * removed write access.
2588 */
2589 if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
2590 (oldprot & VM_PROT_WRITE)) {
2591 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2592 if (refchg & LPTE_CHG)
2593 vm_page_dirty(pg);
2594 if (refchg & LPTE_REF)
2595 vm_page_aflag_set(pg, PGA_REFERENCED);
2596 }
2597 }
2598
2599 void
moea64_protect(pmap_t pm,vm_offset_t sva,vm_offset_t eva,vm_prot_t prot)2600 moea64_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2601 vm_prot_t prot)
2602 {
2603 struct pvo_entry *pvo, key;
2604
2605 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
2606 sva, eva, prot);
2607
2608 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
2609 ("moea64_protect: non current pmap"));
2610
2611 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2612 moea64_remove(pm, sva, eva);
2613 return;
2614 }
2615
2616 PMAP_LOCK(pm);
2617 key.pvo_vaddr = sva;
2618 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2619 pvo != NULL && PVO_VADDR(pvo) < eva;
2620 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
2621 if (PVO_IS_SP(pvo)) {
2622 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2623 pvo = moea64_sp_protect(pvo, prot);
2624 continue;
2625 } else {
2626 CTR1(KTR_PMAP, "%s: demote before protect",
2627 __func__);
2628 moea64_sp_demote(pvo);
2629 }
2630 }
2631 moea64_pvo_protect(pm, pvo, prot);
2632 }
2633 PMAP_UNLOCK(pm);
2634 }
2635
2636 /*
2637 * Map a list of wired pages into kernel virtual address space. This is
2638 * intended for temporary mappings which do not need page modification or
2639 * references recorded. Existing mappings in the region are overwritten.
2640 */
2641 void
moea64_qenter(vm_offset_t va,vm_page_t * m,int count)2642 moea64_qenter(vm_offset_t va, vm_page_t *m, int count)
2643 {
2644 while (count-- > 0) {
2645 moea64_kenter(va, VM_PAGE_TO_PHYS(*m));
2646 va += PAGE_SIZE;
2647 m++;
2648 }
2649 }
2650
2651 /*
2652 * Remove page mappings from kernel virtual address space. Intended for
2653 * temporary mappings entered by moea64_qenter.
2654 */
2655 void
moea64_qremove(vm_offset_t va,int count)2656 moea64_qremove(vm_offset_t va, int count)
2657 {
2658 while (count-- > 0) {
2659 moea64_kremove(va);
2660 va += PAGE_SIZE;
2661 }
2662 }
2663
2664 void
moea64_release_vsid(uint64_t vsid)2665 moea64_release_vsid(uint64_t vsid)
2666 {
2667 int idx, mask;
2668
2669 mtx_lock(&moea64_slb_mutex);
2670 idx = vsid & (NVSIDS-1);
2671 mask = 1 << (idx % VSID_NBPW);
2672 idx /= VSID_NBPW;
2673 KASSERT(moea64_vsid_bitmap[idx] & mask,
2674 ("Freeing unallocated VSID %#jx", vsid));
2675 moea64_vsid_bitmap[idx] &= ~mask;
2676 mtx_unlock(&moea64_slb_mutex);
2677 }
2678
2679 void
moea64_release(pmap_t pmap)2680 moea64_release(pmap_t pmap)
2681 {
2682
2683 /*
2684 * Free segment registers' VSIDs
2685 */
2686 #ifdef __powerpc64__
2687 slb_free_tree(pmap);
2688 slb_free_user_cache(pmap->pm_slb);
2689 #else
2690 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
2691
2692 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
2693 #endif
2694 }
2695
2696 /*
2697 * Remove all pages mapped by the specified pmap
2698 */
2699 void
moea64_remove_pages(pmap_t pm)2700 moea64_remove_pages(pmap_t pm)
2701 {
2702 struct pvo_entry *pvo, *tpvo;
2703 struct pvo_dlist tofree;
2704
2705 SLIST_INIT(&tofree);
2706
2707 PMAP_LOCK(pm);
2708 RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
2709 if (pvo->pvo_vaddr & PVO_WIRED)
2710 continue;
2711
2712 /*
2713 * For locking reasons, remove this from the page table and
2714 * pmap, but save delinking from the vm_page for a second
2715 * pass
2716 */
2717 moea64_pvo_remove_from_pmap(pvo);
2718 SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink);
2719 }
2720 PMAP_UNLOCK(pm);
2721
2722 while (!SLIST_EMPTY(&tofree)) {
2723 pvo = SLIST_FIRST(&tofree);
2724 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2725 moea64_pvo_remove_from_page(pvo);
2726 free_pvo_entry(pvo);
2727 }
2728 }
2729
2730 static void
moea64_remove_locked(pmap_t pm,vm_offset_t sva,vm_offset_t eva,struct pvo_dlist * tofree)2731 moea64_remove_locked(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2732 struct pvo_dlist *tofree)
2733 {
2734 struct pvo_entry *pvo, *tpvo, key;
2735
2736 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2737
2738 key.pvo_vaddr = sva;
2739 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2740 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
2741 if (PVO_IS_SP(pvo)) {
2742 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2743 tpvo = moea64_sp_remove(pvo, tofree);
2744 continue;
2745 } else {
2746 CTR1(KTR_PMAP, "%s: demote before remove",
2747 __func__);
2748 moea64_sp_demote(pvo);
2749 }
2750 }
2751 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
2752
2753 /*
2754 * For locking reasons, remove this from the page table and
2755 * pmap, but save delinking from the vm_page for a second
2756 * pass
2757 */
2758 moea64_pvo_remove_from_pmap(pvo);
2759 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
2760 }
2761 }
2762
2763 /*
2764 * Remove the given range of addresses from the specified map.
2765 */
2766 void
moea64_remove(pmap_t pm,vm_offset_t sva,vm_offset_t eva)2767 moea64_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
2768 {
2769 struct pvo_entry *pvo;
2770 struct pvo_dlist tofree;
2771
2772 /*
2773 * Perform an unsynchronized read. This is, however, safe.
2774 */
2775 if (pm->pm_stats.resident_count == 0)
2776 return;
2777
2778 SLIST_INIT(&tofree);
2779 PMAP_LOCK(pm);
2780 moea64_remove_locked(pm, sva, eva, &tofree);
2781 PMAP_UNLOCK(pm);
2782
2783 while (!SLIST_EMPTY(&tofree)) {
2784 pvo = SLIST_FIRST(&tofree);
2785 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2786 moea64_pvo_remove_from_page(pvo);
2787 free_pvo_entry(pvo);
2788 }
2789 }
2790
2791 /*
2792 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
2793 * will reflect changes in pte's back to the vm_page.
2794 */
2795 void
moea64_remove_all(vm_page_t m)2796 moea64_remove_all(vm_page_t m)
2797 {
2798 struct pvo_entry *pvo, *next_pvo;
2799 struct pvo_head freequeue;
2800 int wasdead;
2801 pmap_t pmap;
2802
2803 LIST_INIT(&freequeue);
2804
2805 PV_PAGE_LOCK(m);
2806 LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
2807 pmap = pvo->pvo_pmap;
2808 PMAP_LOCK(pmap);
2809 wasdead = (pvo->pvo_vaddr & PVO_DEAD);
2810 if (!wasdead) {
2811 if (PVO_IS_SP(pvo)) {
2812 CTR1(KTR_PMAP, "%s: demote before remove_all",
2813 __func__);
2814 moea64_sp_demote(pvo);
2815 }
2816 moea64_pvo_remove_from_pmap(pvo);
2817 }
2818 moea64_pvo_remove_from_page_locked(pvo, m);
2819 if (!wasdead)
2820 LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
2821 PMAP_UNLOCK(pmap);
2822
2823 }
2824 KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));
2825 KASSERT((m->a.flags & PGA_WRITEABLE) == 0, ("Page still writable"));
2826 PV_PAGE_UNLOCK(m);
2827
2828 /* Clean up UMA allocations */
2829 LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo)
2830 free_pvo_entry(pvo);
2831 }
2832
2833 /*
2834 * Allocate a physical page of memory directly from the phys_avail map.
2835 * Can only be called from moea64_bootstrap before avail start and end are
2836 * calculated.
2837 */
2838 vm_offset_t
moea64_bootstrap_alloc(vm_size_t size,vm_size_t align)2839 moea64_bootstrap_alloc(vm_size_t size, vm_size_t align)
2840 {
2841 vm_offset_t s, e;
2842 int i, j;
2843
2844 size = round_page(size);
2845 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2846 if (align != 0)
2847 s = roundup2(phys_avail[i], align);
2848 else
2849 s = phys_avail[i];
2850 e = s + size;
2851
2852 if (s < phys_avail[i] || e > phys_avail[i + 1])
2853 continue;
2854
2855 if (s + size > platform_real_maxaddr())
2856 continue;
2857
2858 if (s == phys_avail[i]) {
2859 phys_avail[i] += size;
2860 } else if (e == phys_avail[i + 1]) {
2861 phys_avail[i + 1] -= size;
2862 } else {
2863 for (j = phys_avail_count * 2; j > i; j -= 2) {
2864 phys_avail[j] = phys_avail[j - 2];
2865 phys_avail[j + 1] = phys_avail[j - 1];
2866 }
2867
2868 phys_avail[i + 3] = phys_avail[i + 1];
2869 phys_avail[i + 1] = s;
2870 phys_avail[i + 2] = e;
2871 phys_avail_count++;
2872 }
2873
2874 return (s);
2875 }
2876 panic("moea64_bootstrap_alloc: could not allocate memory");
2877 }
2878
2879 static int
moea64_pvo_enter(struct pvo_entry * pvo,struct pvo_head * pvo_head,struct pvo_entry ** oldpvop)2880 moea64_pvo_enter(struct pvo_entry *pvo, struct pvo_head *pvo_head,
2881 struct pvo_entry **oldpvop)
2882 {
2883 struct pvo_entry *old_pvo;
2884 int err;
2885
2886 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2887
2888 STAT_MOEA64(moea64_pvo_enter_calls++);
2889
2890 /*
2891 * Add to pmap list
2892 */
2893 old_pvo = RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2894
2895 if (old_pvo != NULL) {
2896 if (oldpvop != NULL)
2897 *oldpvop = old_pvo;
2898 return (EEXIST);
2899 }
2900
2901 if (pvo_head != NULL) {
2902 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
2903 }
2904
2905 if (pvo->pvo_vaddr & PVO_WIRED)
2906 pvo->pvo_pmap->pm_stats.wired_count++;
2907 pvo->pvo_pmap->pm_stats.resident_count++;
2908
2909 /*
2910 * Insert it into the hardware page table
2911 */
2912 err = moea64_pte_insert(pvo);
2913 if (err != 0) {
2914 panic("moea64_pvo_enter: overflow");
2915 }
2916
2917 STAT_MOEA64(moea64_pvo_entries++);
2918
2919 if (pvo->pvo_pmap == kernel_pmap)
2920 isync();
2921
2922 #ifdef __powerpc64__
2923 /*
2924 * Make sure all our bootstrap mappings are in the SLB as soon
2925 * as virtual memory is switched on.
2926 */
2927 if (!pmap_bootstrapped)
2928 moea64_bootstrap_slb_prefault(PVO_VADDR(pvo),
2929 pvo->pvo_vaddr & PVO_LARGE);
2930 #endif
2931
2932 return (0);
2933 }
2934
2935 static void
moea64_pvo_remove_from_pmap(struct pvo_entry * pvo)2936 moea64_pvo_remove_from_pmap(struct pvo_entry *pvo)
2937 {
2938 struct vm_page *pg;
2939 int32_t refchg;
2940
2941 KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap"));
2942 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2943 KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO"));
2944
2945 /*
2946 * If there is an active pte entry, we need to deactivate it
2947 */
2948 refchg = moea64_pte_unset(pvo);
2949 if (refchg < 0) {
2950 /*
2951 * If it was evicted from the page table, be pessimistic and
2952 * dirty the page.
2953 */
2954 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
2955 refchg = LPTE_CHG;
2956 else
2957 refchg = 0;
2958 }
2959
2960 /*
2961 * Update our statistics.
2962 */
2963 pvo->pvo_pmap->pm_stats.resident_count--;
2964 if (pvo->pvo_vaddr & PVO_WIRED)
2965 pvo->pvo_pmap->pm_stats.wired_count--;
2966
2967 /*
2968 * Remove this PVO from the pmap list.
2969 */
2970 RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2971
2972 /*
2973 * Mark this for the next sweep
2974 */
2975 pvo->pvo_vaddr |= PVO_DEAD;
2976
2977 /* Send RC bits to VM */
2978 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2979 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2980 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2981 if (pg != NULL) {
2982 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2983 if (refchg & LPTE_CHG)
2984 vm_page_dirty(pg);
2985 if (refchg & LPTE_REF)
2986 vm_page_aflag_set(pg, PGA_REFERENCED);
2987 }
2988 }
2989 }
2990
2991 static inline void
moea64_pvo_remove_from_page_locked(struct pvo_entry * pvo,vm_page_t m)2992 moea64_pvo_remove_from_page_locked(struct pvo_entry *pvo,
2993 vm_page_t m)
2994 {
2995
2996 KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page"));
2997
2998 /* Use NULL pmaps as a sentinel for races in page deletion */
2999 if (pvo->pvo_pmap == NULL)
3000 return;
3001 pvo->pvo_pmap = NULL;
3002
3003 /*
3004 * Update vm about page writeability/executability if managed
3005 */
3006 PV_LOCKASSERT(PVO_PADDR(pvo));
3007 if (pvo->pvo_vaddr & PVO_MANAGED) {
3008 if (m != NULL) {
3009 LIST_REMOVE(pvo, pvo_vlink);
3010 if (LIST_EMPTY(vm_page_to_pvoh(m)))
3011 vm_page_aflag_clear(m,
3012 PGA_WRITEABLE | PGA_EXECUTABLE);
3013 }
3014 }
3015
3016 STAT_MOEA64(moea64_pvo_entries--);
3017 STAT_MOEA64(moea64_pvo_remove_calls++);
3018 }
3019
3020 static void
moea64_pvo_remove_from_page(struct pvo_entry * pvo)3021 moea64_pvo_remove_from_page(struct pvo_entry *pvo)
3022 {
3023 vm_page_t pg = NULL;
3024
3025 if (pvo->pvo_vaddr & PVO_MANAGED)
3026 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
3027
3028 PV_LOCK(PVO_PADDR(pvo));
3029 moea64_pvo_remove_from_page_locked(pvo, pg);
3030 PV_UNLOCK(PVO_PADDR(pvo));
3031 }
3032
3033 static struct pvo_entry *
moea64_pvo_find_va(pmap_t pm,vm_offset_t va)3034 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
3035 {
3036 struct pvo_entry key;
3037
3038 PMAP_LOCK_ASSERT(pm, MA_OWNED);
3039
3040 key.pvo_vaddr = va & ~ADDR_POFF;
3041 return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
3042 }
3043
3044 static bool
moea64_query_bit(vm_page_t m,uint64_t ptebit)3045 moea64_query_bit(vm_page_t m, uint64_t ptebit)
3046 {
3047 struct pvo_entry *pvo;
3048 int64_t ret;
3049 bool rv;
3050 vm_page_t sp;
3051
3052 /*
3053 * See if this bit is stored in the page already.
3054 *
3055 * For superpages, the bit is stored in the first vm page.
3056 */
3057 if ((m->md.mdpg_attrs & ptebit) != 0 ||
3058 ((sp = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK)) != NULL &&
3059 (sp->md.mdpg_attrs & (ptebit | MDPG_ATTR_SP)) ==
3060 (ptebit | MDPG_ATTR_SP)))
3061 return (true);
3062
3063 /*
3064 * Examine each PTE. Sync so that any pending REF/CHG bits are
3065 * flushed to the PTEs.
3066 */
3067 rv = false;
3068 powerpc_sync();
3069 PV_PAGE_LOCK(m);
3070 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3071 if (PVO_IS_SP(pvo)) {
3072 ret = moea64_sp_query(pvo, ptebit);
3073 /*
3074 * If SP was not demoted, check its REF/CHG bits here.
3075 */
3076 if (ret != -1) {
3077 if ((ret & ptebit) != 0) {
3078 rv = true;
3079 break;
3080 }
3081 continue;
3082 }
3083 /* else, fallthrough */
3084 }
3085
3086 ret = 0;
3087
3088 /*
3089 * See if this pvo has a valid PTE. if so, fetch the
3090 * REF/CHG bits from the valid PTE. If the appropriate
3091 * ptebit is set, return success.
3092 */
3093 PMAP_LOCK(pvo->pvo_pmap);
3094 if (!(pvo->pvo_vaddr & PVO_DEAD))
3095 ret = moea64_pte_synch(pvo);
3096 PMAP_UNLOCK(pvo->pvo_pmap);
3097
3098 if (ret > 0) {
3099 atomic_set_32(&m->md.mdpg_attrs,
3100 ret & (LPTE_CHG | LPTE_REF));
3101 if (ret & ptebit) {
3102 rv = true;
3103 break;
3104 }
3105 }
3106 }
3107 PV_PAGE_UNLOCK(m);
3108
3109 return (rv);
3110 }
3111
3112 static u_int
moea64_clear_bit(vm_page_t m,u_int64_t ptebit)3113 moea64_clear_bit(vm_page_t m, u_int64_t ptebit)
3114 {
3115 u_int count;
3116 struct pvo_entry *pvo;
3117 int64_t ret;
3118
3119 /*
3120 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
3121 * we can reset the right ones).
3122 */
3123 powerpc_sync();
3124
3125 /*
3126 * For each pvo entry, clear the pte's ptebit.
3127 */
3128 count = 0;
3129 PV_PAGE_LOCK(m);
3130 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3131 if (PVO_IS_SP(pvo)) {
3132 if ((ret = moea64_sp_clear(pvo, m, ptebit)) != -1) {
3133 count += ret;
3134 continue;
3135 }
3136 }
3137 ret = 0;
3138
3139 PMAP_LOCK(pvo->pvo_pmap);
3140 if (!(pvo->pvo_vaddr & PVO_DEAD))
3141 ret = moea64_pte_clear(pvo, ptebit);
3142 PMAP_UNLOCK(pvo->pvo_pmap);
3143
3144 if (ret > 0 && (ret & ptebit))
3145 count++;
3146 }
3147 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
3148 PV_PAGE_UNLOCK(m);
3149
3150 return (count);
3151 }
3152
3153 int
moea64_dev_direct_mapped(vm_paddr_t pa,vm_size_t size)3154 moea64_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
3155 {
3156 struct pvo_entry *pvo, key;
3157 vm_offset_t ppa;
3158 int error = 0;
3159
3160 if (hw_direct_map && mem_valid(pa, size) == 0)
3161 return (0);
3162
3163 PMAP_LOCK(kernel_pmap);
3164 ppa = pa & ~ADDR_POFF;
3165 key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa;
3166 for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
3167 ppa < pa + size; ppa += PAGE_SIZE,
3168 pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
3169 if (pvo == NULL || PVO_PADDR(pvo) != ppa) {
3170 error = EFAULT;
3171 break;
3172 }
3173 }
3174 PMAP_UNLOCK(kernel_pmap);
3175
3176 return (error);
3177 }
3178
3179 /*
3180 * Map a set of physical memory pages into the kernel virtual
3181 * address space. Return a pointer to where it is mapped. This
3182 * routine is intended to be used for mapping device memory,
3183 * NOT real memory.
3184 */
3185 void *
moea64_mapdev_attr(vm_paddr_t pa,vm_size_t size,vm_memattr_t ma)3186 moea64_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
3187 {
3188 vm_offset_t va, tmpva, ppa, offset;
3189
3190 ppa = trunc_page(pa);
3191 offset = pa & PAGE_MASK;
3192 size = roundup2(offset + size, PAGE_SIZE);
3193
3194 va = kva_alloc(size);
3195
3196 if (!va)
3197 panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
3198
3199 for (tmpva = va; size > 0;) {
3200 moea64_kenter_attr(tmpva, ppa, ma);
3201 size -= PAGE_SIZE;
3202 tmpva += PAGE_SIZE;
3203 ppa += PAGE_SIZE;
3204 }
3205
3206 return ((void *)(va + offset));
3207 }
3208
3209 void *
moea64_mapdev(vm_paddr_t pa,vm_size_t size)3210 moea64_mapdev(vm_paddr_t pa, vm_size_t size)
3211 {
3212
3213 return moea64_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT);
3214 }
3215
3216 void
moea64_unmapdev(void * p,vm_size_t size)3217 moea64_unmapdev(void *p, vm_size_t size)
3218 {
3219 vm_offset_t base, offset, va;
3220
3221 va = (vm_offset_t)p;
3222 base = trunc_page(va);
3223 offset = va & PAGE_MASK;
3224 size = roundup2(offset + size, PAGE_SIZE);
3225
3226 moea64_qremove(base, atop(size));
3227 kva_free(base, size);
3228 }
3229
3230 void
moea64_sync_icache(pmap_t pm,vm_offset_t va,vm_size_t sz)3231 moea64_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3232 {
3233 struct pvo_entry *pvo;
3234 vm_offset_t lim;
3235 vm_paddr_t pa;
3236 vm_size_t len;
3237
3238 if (__predict_false(pm == NULL))
3239 pm = &curthread->td_proc->p_vmspace->vm_pmap;
3240
3241 PMAP_LOCK(pm);
3242 while (sz > 0) {
3243 lim = round_page(va+1);
3244 len = MIN(lim - va, sz);
3245 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
3246 if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) {
3247 pa = PVO_PADDR(pvo) | (va & ADDR_POFF);
3248 moea64_syncicache(pm, va, pa, len);
3249 }
3250 va += len;
3251 sz -= len;
3252 }
3253 PMAP_UNLOCK(pm);
3254 }
3255
3256 void
moea64_dumpsys_map(vm_paddr_t pa,size_t sz,void ** va)3257 moea64_dumpsys_map(vm_paddr_t pa, size_t sz, void **va)
3258 {
3259
3260 *va = (void *)(uintptr_t)pa;
3261 }
3262
3263 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
3264
3265 void
moea64_scan_init(void)3266 moea64_scan_init(void)
3267 {
3268 struct pvo_entry *pvo;
3269 vm_offset_t va;
3270 int i;
3271
3272 if (!do_minidump) {
3273 /* Initialize phys. segments for dumpsys(). */
3274 memset(&dump_map, 0, sizeof(dump_map));
3275 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
3276 for (i = 0; i < pregions_sz; i++) {
3277 dump_map[i].pa_start = pregions[i].mr_start;
3278 dump_map[i].pa_size = pregions[i].mr_size;
3279 }
3280 return;
3281 }
3282
3283 /* Virtual segments for minidumps: */
3284 memset(&dump_map, 0, sizeof(dump_map));
3285
3286 /* 1st: kernel .data and .bss. */
3287 dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
3288 dump_map[0].pa_size = round_page((uintptr_t)_end) -
3289 dump_map[0].pa_start;
3290
3291 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
3292 dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr;
3293 dump_map[1].pa_size = round_page(msgbufp->msg_size);
3294
3295 /* 3rd: kernel VM. */
3296 va = dump_map[1].pa_start + dump_map[1].pa_size;
3297 /* Find start of next chunk (from va). */
3298 while (va < virtual_end) {
3299 /* Don't dump the buffer cache. */
3300 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
3301 va = kmi.buffer_eva;
3302 continue;
3303 }
3304 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3305 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
3306 break;
3307 va += PAGE_SIZE;
3308 }
3309 if (va < virtual_end) {
3310 dump_map[2].pa_start = va;
3311 va += PAGE_SIZE;
3312 /* Find last page in chunk. */
3313 while (va < virtual_end) {
3314 /* Don't run into the buffer cache. */
3315 if (va == kmi.buffer_sva)
3316 break;
3317 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3318 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD))
3319 break;
3320 va += PAGE_SIZE;
3321 }
3322 dump_map[2].pa_size = va - dump_map[2].pa_start;
3323 }
3324 }
3325
3326 #ifdef __powerpc64__
3327
3328 static size_t
moea64_scan_pmap(struct bitset * dump_bitset)3329 moea64_scan_pmap(struct bitset *dump_bitset)
3330 {
3331 struct pvo_entry *pvo;
3332 vm_paddr_t pa, pa_end;
3333 vm_offset_t va, pgva, kstart, kend, kstart_lp, kend_lp;
3334 uint64_t lpsize;
3335
3336 lpsize = moea64_large_page_size;
3337 kstart = trunc_page((vm_offset_t)_etext);
3338 kend = round_page((vm_offset_t)_end);
3339 kstart_lp = kstart & ~moea64_large_page_mask;
3340 kend_lp = (kend + moea64_large_page_mask) & ~moea64_large_page_mask;
3341
3342 CTR4(KTR_PMAP, "moea64_scan_pmap: kstart=0x%016lx, kend=0x%016lx, "
3343 "kstart_lp=0x%016lx, kend_lp=0x%016lx",
3344 kstart, kend, kstart_lp, kend_lp);
3345
3346 PMAP_LOCK(kernel_pmap);
3347 RB_FOREACH(pvo, pvo_tree, &kernel_pmap->pmap_pvo) {
3348 va = pvo->pvo_vaddr;
3349
3350 if (va & PVO_DEAD)
3351 continue;
3352
3353 /* Skip DMAP (except kernel area) */
3354 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) {
3355 if (va & PVO_LARGE) {
3356 pgva = va & ~moea64_large_page_mask;
3357 if (pgva < kstart_lp || pgva >= kend_lp)
3358 continue;
3359 } else {
3360 pgva = trunc_page(va);
3361 if (pgva < kstart || pgva >= kend)
3362 continue;
3363 }
3364 }
3365
3366 pa = PVO_PADDR(pvo);
3367
3368 if (va & PVO_LARGE) {
3369 pa_end = pa + lpsize;
3370 for (; pa < pa_end; pa += PAGE_SIZE) {
3371 if (vm_phys_is_dumpable(pa))
3372 vm_page_dump_add(dump_bitset, pa);
3373 }
3374 } else {
3375 if (vm_phys_is_dumpable(pa))
3376 vm_page_dump_add(dump_bitset, pa);
3377 }
3378 }
3379 PMAP_UNLOCK(kernel_pmap);
3380
3381 return (sizeof(struct lpte) * moea64_pteg_count * 8);
3382 }
3383
3384 static struct dump_context dump_ctx;
3385
3386 static void *
moea64_dump_pmap_init(unsigned blkpgs)3387 moea64_dump_pmap_init(unsigned blkpgs)
3388 {
3389 dump_ctx.ptex = 0;
3390 dump_ctx.ptex_end = moea64_pteg_count * 8;
3391 dump_ctx.blksz = blkpgs * PAGE_SIZE;
3392 return (&dump_ctx);
3393 }
3394
3395 #else
3396
3397 static size_t
moea64_scan_pmap(struct bitset * dump_bitset __unused)3398 moea64_scan_pmap(struct bitset *dump_bitset __unused)
3399 {
3400 return (0);
3401 }
3402
3403 static void *
moea64_dump_pmap_init(unsigned blkpgs)3404 moea64_dump_pmap_init(unsigned blkpgs)
3405 {
3406 return (NULL);
3407 }
3408
3409 #endif
3410
3411 #ifdef __powerpc64__
3412 static void
moea64_map_range(vm_offset_t va,vm_paddr_t pa,vm_size_t npages)3413 moea64_map_range(vm_offset_t va, vm_paddr_t pa, vm_size_t npages)
3414 {
3415
3416 for (; npages > 0; --npages) {
3417 if (moea64_large_page_size != 0 &&
3418 (pa & moea64_large_page_mask) == 0 &&
3419 (va & moea64_large_page_mask) == 0 &&
3420 npages >= (moea64_large_page_size >> PAGE_SHIFT)) {
3421 PMAP_LOCK(kernel_pmap);
3422 moea64_kenter_large(va, pa, 0, 0);
3423 PMAP_UNLOCK(kernel_pmap);
3424 pa += moea64_large_page_size;
3425 va += moea64_large_page_size;
3426 npages -= (moea64_large_page_size >> PAGE_SHIFT) - 1;
3427 } else {
3428 moea64_kenter(va, pa);
3429 pa += PAGE_SIZE;
3430 va += PAGE_SIZE;
3431 }
3432 }
3433 }
3434
3435 static void
moea64_page_array_startup(long pages)3436 moea64_page_array_startup(long pages)
3437 {
3438 long dom_pages[MAXMEMDOM];
3439 vm_paddr_t pa;
3440 vm_offset_t va, vm_page_base;
3441 vm_size_t needed, size;
3442 int domain;
3443 int i;
3444
3445 vm_page_base = 0xd000000000000000ULL;
3446
3447 /* Short-circuit single-domain systems. */
3448 if (vm_ndomains == 1) {
3449 size = round_page(pages * sizeof(struct vm_page));
3450 pa = vm_phys_early_alloc(0, size);
3451 vm_page_base = moea64_map(&vm_page_base,
3452 pa, pa + size, VM_PROT_READ | VM_PROT_WRITE);
3453 vm_page_array_size = pages;
3454 vm_page_array = (vm_page_t)vm_page_base;
3455 return;
3456 }
3457
3458 for (i = 0; i < MAXMEMDOM; i++)
3459 dom_pages[i] = 0;
3460
3461 /* Now get the number of pages required per domain. */
3462 for (i = 0; i < vm_phys_nsegs; i++) {
3463 domain = vm_phys_segs[i].domain;
3464 KASSERT(domain < MAXMEMDOM,
3465 ("Invalid vm_phys_segs NUMA domain %d!\n", domain));
3466 /* Get size of vm_page_array needed for this segment. */
3467 size = btoc(vm_phys_segs[i].end - vm_phys_segs[i].start);
3468 dom_pages[domain] += size;
3469 }
3470
3471 for (i = 0; phys_avail[i + 1] != 0; i+= 2) {
3472 domain = vm_phys_domain(phys_avail[i]);
3473 KASSERT(domain < MAXMEMDOM,
3474 ("Invalid phys_avail NUMA domain %d!\n", domain));
3475 size = btoc(phys_avail[i + 1] - phys_avail[i]);
3476 dom_pages[domain] += size;
3477 }
3478
3479 /*
3480 * Map in chunks that can get us all 16MB pages. There will be some
3481 * overlap between domains, but that's acceptable for now.
3482 */
3483 vm_page_array_size = 0;
3484 va = vm_page_base;
3485 for (i = 0; i < MAXMEMDOM && vm_page_array_size < pages; i++) {
3486 if (dom_pages[i] == 0)
3487 continue;
3488 size = ulmin(pages - vm_page_array_size, dom_pages[i]);
3489 size = round_page(size * sizeof(struct vm_page));
3490 needed = size;
3491 size = roundup2(size, moea64_large_page_size);
3492 pa = vm_phys_early_alloc(i, size);
3493 vm_page_array_size += size / sizeof(struct vm_page);
3494 moea64_map_range(va, pa, size >> PAGE_SHIFT);
3495 /* Scoot up domain 0, to reduce the domain page overlap. */
3496 if (i == 0)
3497 vm_page_base += size - needed;
3498 va += size;
3499 }
3500 vm_page_array = (vm_page_t)vm_page_base;
3501 vm_page_array_size = pages;
3502 }
3503 #endif
3504
3505 static int64_t
moea64_null_method(void)3506 moea64_null_method(void)
3507 {
3508 return (0);
3509 }
3510
moea64_pte_replace_default(struct pvo_entry * pvo,int flags)3511 static int64_t moea64_pte_replace_default(struct pvo_entry *pvo, int flags)
3512 {
3513 int64_t refchg;
3514
3515 refchg = moea64_pte_unset(pvo);
3516 moea64_pte_insert(pvo);
3517
3518 return (refchg);
3519 }
3520
3521 struct moea64_funcs *moea64_ops;
3522
3523 #define DEFINE_OEA64_IFUNC(ret, func, args, def) \
3524 DEFINE_IFUNC(, ret, moea64_##func, args) { \
3525 moea64_##func##_t f; \
3526 if (moea64_ops == NULL) \
3527 return ((moea64_##func##_t)def); \
3528 f = moea64_ops->func; \
3529 return (f != NULL ? f : (moea64_##func##_t)def);\
3530 }
3531
3532 void
moea64_install(void)3533 moea64_install(void)
3534 {
3535 #ifdef __powerpc64__
3536 if (hw_direct_map == -1) {
3537 moea64_probe_large_page();
3538
3539 /* Use a direct map if we have large page support */
3540 if (moea64_large_page_size > 0)
3541 hw_direct_map = 1;
3542 else
3543 hw_direct_map = 0;
3544 }
3545 #endif
3546
3547 /*
3548 * Default to non-DMAP, and switch over to DMAP functions once we know
3549 * we have DMAP.
3550 */
3551 if (hw_direct_map) {
3552 moea64_methods.quick_enter_page = moea64_quick_enter_page_dmap;
3553 moea64_methods.quick_remove_page = NULL;
3554 moea64_methods.copy_page = moea64_copy_page_dmap;
3555 moea64_methods.zero_page = moea64_zero_page_dmap;
3556 moea64_methods.copy_pages = moea64_copy_pages_dmap;
3557 }
3558 }
3559
3560 DEFINE_OEA64_IFUNC(int64_t, pte_replace, (struct pvo_entry *, int),
3561 moea64_pte_replace_default)
3562 DEFINE_OEA64_IFUNC(int64_t, pte_insert, (struct pvo_entry *), moea64_null_method)
3563 DEFINE_OEA64_IFUNC(int64_t, pte_unset, (struct pvo_entry *), moea64_null_method)
3564 DEFINE_OEA64_IFUNC(int64_t, pte_clear, (struct pvo_entry *, uint64_t),
3565 moea64_null_method)
3566 DEFINE_OEA64_IFUNC(int64_t, pte_synch, (struct pvo_entry *), moea64_null_method)
3567 DEFINE_OEA64_IFUNC(int64_t, pte_insert_sp, (struct pvo_entry *), moea64_null_method)
3568 DEFINE_OEA64_IFUNC(int64_t, pte_unset_sp, (struct pvo_entry *), moea64_null_method)
3569 DEFINE_OEA64_IFUNC(int64_t, pte_replace_sp, (struct pvo_entry *), moea64_null_method)
3570
3571 /* Superpage functions */
3572
3573 /* MMU interface */
3574
3575 static bool
moea64_ps_enabled(pmap_t pmap)3576 moea64_ps_enabled(pmap_t pmap)
3577 {
3578 return (superpages_enabled);
3579 }
3580
3581 static void
moea64_align_superpage(vm_object_t object,vm_ooffset_t offset,vm_offset_t * addr,vm_size_t size)3582 moea64_align_superpage(vm_object_t object, vm_ooffset_t offset,
3583 vm_offset_t *addr, vm_size_t size)
3584 {
3585 vm_offset_t sp_offset;
3586
3587 if (size < HPT_SP_SIZE)
3588 return;
3589
3590 CTR4(KTR_PMAP, "%s: offs=%#jx, addr=%p, size=%#jx",
3591 __func__, (uintmax_t)offset, addr, (uintmax_t)size);
3592
3593 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3594 offset += ptoa(object->pg_color);
3595 sp_offset = offset & HPT_SP_MASK;
3596 if (size - ((HPT_SP_SIZE - sp_offset) & HPT_SP_MASK) < HPT_SP_SIZE ||
3597 (*addr & HPT_SP_MASK) == sp_offset)
3598 return;
3599 if ((*addr & HPT_SP_MASK) < sp_offset)
3600 *addr = (*addr & ~HPT_SP_MASK) + sp_offset;
3601 else
3602 *addr = ((*addr + HPT_SP_MASK) & ~HPT_SP_MASK) + sp_offset;
3603 }
3604
3605 /* Helpers */
3606
3607 static __inline void
moea64_pvo_cleanup(struct pvo_dlist * tofree)3608 moea64_pvo_cleanup(struct pvo_dlist *tofree)
3609 {
3610 struct pvo_entry *pvo;
3611
3612 /* clean up */
3613 while (!SLIST_EMPTY(tofree)) {
3614 pvo = SLIST_FIRST(tofree);
3615 SLIST_REMOVE_HEAD(tofree, pvo_dlink);
3616 if (pvo->pvo_vaddr & PVO_DEAD)
3617 moea64_pvo_remove_from_page(pvo);
3618 free_pvo_entry(pvo);
3619 }
3620 }
3621
3622 static __inline uint16_t
pvo_to_vmpage_flags(struct pvo_entry * pvo)3623 pvo_to_vmpage_flags(struct pvo_entry *pvo)
3624 {
3625 uint16_t flags;
3626
3627 flags = 0;
3628 if ((pvo->pvo_pte.prot & VM_PROT_WRITE) != 0)
3629 flags |= PGA_WRITEABLE;
3630 if ((pvo->pvo_pte.prot & VM_PROT_EXECUTE) != 0)
3631 flags |= PGA_EXECUTABLE;
3632
3633 return (flags);
3634 }
3635
3636 /*
3637 * Check if the given pvo and its superpage are in sva-eva range.
3638 */
3639 static __inline bool
moea64_sp_pvo_in_range(struct pvo_entry * pvo,vm_offset_t sva,vm_offset_t eva)3640 moea64_sp_pvo_in_range(struct pvo_entry *pvo, vm_offset_t sva, vm_offset_t eva)
3641 {
3642 vm_offset_t spva;
3643
3644 spva = PVO_VADDR(pvo) & ~HPT_SP_MASK;
3645 if (spva >= sva && spva + HPT_SP_SIZE <= eva) {
3646 /*
3647 * Because this function is intended to be called from loops
3648 * that iterate over ordered pvo entries, if the condition
3649 * above is true then the pvo must be the first of its
3650 * superpage.
3651 */
3652 KASSERT(PVO_VADDR(pvo) == spva,
3653 ("%s: unexpected unaligned superpage pvo", __func__));
3654 return (true);
3655 }
3656 return (false);
3657 }
3658
3659 /*
3660 * Update vm about the REF/CHG bits if the superpage is managed and
3661 * has (or had) write access.
3662 */
3663 static void
moea64_sp_refchg_process(struct pvo_entry * sp,vm_page_t m,int64_t sp_refchg,vm_prot_t prot)3664 moea64_sp_refchg_process(struct pvo_entry *sp, vm_page_t m,
3665 int64_t sp_refchg, vm_prot_t prot)
3666 {
3667 vm_page_t m_end;
3668 int64_t refchg;
3669
3670 if ((sp->pvo_vaddr & PVO_MANAGED) != 0 && (prot & VM_PROT_WRITE) != 0) {
3671 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++) {
3672 refchg = sp_refchg |
3673 atomic_readandclear_32(&m->md.mdpg_attrs);
3674 if (refchg & LPTE_CHG)
3675 vm_page_dirty(m);
3676 if (refchg & LPTE_REF)
3677 vm_page_aflag_set(m, PGA_REFERENCED);
3678 }
3679 }
3680 }
3681
3682 /* Superpage ops */
3683
3684 static int
moea64_sp_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)3685 moea64_sp_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
3686 vm_prot_t prot, u_int flags, int8_t psind)
3687 {
3688 struct pvo_entry *pvo, **pvos;
3689 struct pvo_head *pvo_head;
3690 vm_offset_t sva;
3691 vm_page_t sm;
3692 vm_paddr_t pa, spa;
3693 bool sync;
3694 struct pvo_dlist tofree;
3695 int error __diagused, i;
3696 uint16_t aflags;
3697
3698 KASSERT((va & HPT_SP_MASK) == 0, ("%s: va %#jx unaligned",
3699 __func__, (uintmax_t)va));
3700 KASSERT(psind == 1, ("%s: invalid psind: %d", __func__, psind));
3701 KASSERT(m->psind == 1, ("%s: invalid m->psind: %d",
3702 __func__, m->psind));
3703 KASSERT(pmap != kernel_pmap,
3704 ("%s: function called with kernel pmap", __func__));
3705
3706 CTR5(KTR_PMAP, "%s: va=%#jx, pa=%#jx, prot=%#x, flags=%#x, psind=1",
3707 __func__, (uintmax_t)va, (uintmax_t)VM_PAGE_TO_PHYS(m),
3708 prot, flags);
3709
3710 SLIST_INIT(&tofree);
3711
3712 sva = va;
3713 sm = m;
3714 spa = pa = VM_PAGE_TO_PHYS(sm);
3715
3716 /* Try to allocate all PVOs first, to make failure handling easier. */
3717 pvos = malloc(HPT_SP_PAGES * sizeof(struct pvo_entry *), M_TEMP,
3718 M_NOWAIT);
3719 if (pvos == NULL) {
3720 CTR1(KTR_PMAP, "%s: failed to alloc pvo array", __func__);
3721 return (KERN_RESOURCE_SHORTAGE);
3722 }
3723
3724 for (i = 0; i < HPT_SP_PAGES; i++) {
3725 pvos[i] = alloc_pvo_entry(0);
3726 if (pvos[i] == NULL) {
3727 CTR1(KTR_PMAP, "%s: failed to alloc pvo", __func__);
3728 for (i = i - 1; i >= 0; i--)
3729 free_pvo_entry(pvos[i]);
3730 free(pvos, M_TEMP);
3731 return (KERN_RESOURCE_SHORTAGE);
3732 }
3733 }
3734
3735 SP_PV_LOCK_ALIGNED(spa);
3736 PMAP_LOCK(pmap);
3737
3738 /* Note: moea64_remove_locked() also clears cached REF/CHG bits. */
3739 moea64_remove_locked(pmap, va, va + HPT_SP_SIZE, &tofree);
3740
3741 /* Enter pages */
3742 for (i = 0; i < HPT_SP_PAGES;
3743 i++, va += PAGE_SIZE, pa += PAGE_SIZE, m++) {
3744 pvo = pvos[i];
3745
3746 pvo->pvo_pte.prot = prot;
3747 pvo->pvo_pte.pa = (pa & ~HPT_SP_MASK) | LPTE_LP_4K_16M |
3748 moea64_calc_wimg(pa, pmap_page_get_memattr(m));
3749
3750 if ((flags & PMAP_ENTER_WIRED) != 0)
3751 pvo->pvo_vaddr |= PVO_WIRED;
3752 pvo->pvo_vaddr |= PVO_LARGE;
3753
3754 if ((m->oflags & VPO_UNMANAGED) != 0)
3755 pvo_head = NULL;
3756 else {
3757 pvo_head = &m->md.mdpg_pvoh;
3758 pvo->pvo_vaddr |= PVO_MANAGED;
3759 }
3760
3761 init_pvo_entry(pvo, pmap, va);
3762
3763 error = moea64_pvo_enter(pvo, pvo_head, NULL);
3764 /*
3765 * All superpage PVOs were previously removed, so no errors
3766 * should occur while inserting the new ones.
3767 */
3768 KASSERT(error == 0, ("%s: unexpected error "
3769 "when inserting superpage PVO: %d",
3770 __func__, error));
3771 }
3772
3773 PMAP_UNLOCK(pmap);
3774 SP_PV_UNLOCK_ALIGNED(spa);
3775
3776 sync = (sm->a.flags & PGA_EXECUTABLE) == 0;
3777 /* Note: moea64_pvo_cleanup() also clears page prot. flags. */
3778 moea64_pvo_cleanup(&tofree);
3779 pvo = pvos[0];
3780
3781 /* Set vm page flags */
3782 aflags = pvo_to_vmpage_flags(pvo);
3783 if (aflags != 0)
3784 for (m = sm; m < &sm[HPT_SP_PAGES]; m++)
3785 vm_page_aflag_set(m, aflags);
3786
3787 /*
3788 * Flush the page from the instruction cache if this page is
3789 * mapped executable and cacheable.
3790 */
3791 if (sync && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0)
3792 moea64_syncicache(pmap, sva, spa, HPT_SP_SIZE);
3793
3794 atomic_add_long(&sp_mappings, 1);
3795 CTR3(KTR_PMAP, "%s: SP success for va %#jx in pmap %p",
3796 __func__, (uintmax_t)sva, pmap);
3797
3798 free(pvos, M_TEMP);
3799 return (KERN_SUCCESS);
3800 }
3801
3802 #if VM_NRESERVLEVEL > 0
3803 static void
moea64_sp_promote(pmap_t pmap,vm_offset_t va,vm_page_t m)3804 moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m)
3805 {
3806 struct pvo_entry *first, *pvo;
3807 vm_paddr_t pa, pa_end;
3808 vm_offset_t sva, va_end;
3809 int64_t sp_refchg;
3810
3811 /* This CTR may generate a lot of output. */
3812 /* CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)va); */
3813
3814 va &= ~HPT_SP_MASK;
3815 sva = va;
3816 /* Get superpage */
3817 pa = VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK;
3818 m = PHYS_TO_VM_PAGE(pa);
3819
3820 PMAP_LOCK(pmap);
3821
3822 /*
3823 * Check if all pages meet promotion criteria.
3824 *
3825 * XXX In some cases the loop below may be executed for each or most
3826 * of the entered pages of a superpage, which can be expensive
3827 * (although it was not profiled) and need some optimization.
3828 *
3829 * Some cases where this seems to happen are:
3830 * - When a superpage is first entered read-only and later becomes
3831 * read-write.
3832 * - When some of the superpage's virtual addresses map to previously
3833 * wired/cached pages while others map to pages allocated from a
3834 * different physical address range. A common scenario where this
3835 * happens is when mmap'ing a file that is already present in FS
3836 * block cache and doesn't fill a superpage.
3837 */
3838 first = pvo = moea64_pvo_find_va(pmap, sva);
3839 for (pa_end = pa + HPT_SP_SIZE;
3840 pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
3841 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
3842 CTR3(KTR_PMAP,
3843 "%s: NULL or dead PVO: pmap=%p, va=%#jx",
3844 __func__, pmap, (uintmax_t)va);
3845 goto error;
3846 }
3847 if (PVO_PADDR(pvo) != pa) {
3848 CTR5(KTR_PMAP, "%s: PAs don't match: "
3849 "pmap=%p, va=%#jx, pvo_pa=%#jx, exp_pa=%#jx",
3850 __func__, pmap, (uintmax_t)va,
3851 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)pa);
3852 atomic_add_long(&sp_p_fail_pa, 1);
3853 goto error;
3854 }
3855 if ((first->pvo_vaddr & PVO_FLAGS_PROMOTE) !=
3856 (pvo->pvo_vaddr & PVO_FLAGS_PROMOTE)) {
3857 CTR5(KTR_PMAP, "%s: PVO flags don't match: "
3858 "pmap=%p, va=%#jx, pvo_flags=%#jx, exp_flags=%#jx",
3859 __func__, pmap, (uintmax_t)va,
3860 (uintmax_t)(pvo->pvo_vaddr & PVO_FLAGS_PROMOTE),
3861 (uintmax_t)(first->pvo_vaddr & PVO_FLAGS_PROMOTE));
3862 atomic_add_long(&sp_p_fail_flags, 1);
3863 goto error;
3864 }
3865 if (first->pvo_pte.prot != pvo->pvo_pte.prot) {
3866 CTR5(KTR_PMAP, "%s: PVO protections don't match: "
3867 "pmap=%p, va=%#jx, pvo_prot=%#x, exp_prot=%#x",
3868 __func__, pmap, (uintmax_t)va,
3869 pvo->pvo_pte.prot, first->pvo_pte.prot);
3870 atomic_add_long(&sp_p_fail_prot, 1);
3871 goto error;
3872 }
3873 if ((first->pvo_pte.pa & LPTE_WIMG) !=
3874 (pvo->pvo_pte.pa & LPTE_WIMG)) {
3875 CTR5(KTR_PMAP, "%s: WIMG bits don't match: "
3876 "pmap=%p, va=%#jx, pvo_wimg=%#jx, exp_wimg=%#jx",
3877 __func__, pmap, (uintmax_t)va,
3878 (uintmax_t)(pvo->pvo_pte.pa & LPTE_WIMG),
3879 (uintmax_t)(first->pvo_pte.pa & LPTE_WIMG));
3880 atomic_add_long(&sp_p_fail_wimg, 1);
3881 goto error;
3882 }
3883
3884 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo);
3885 }
3886
3887 /* All OK, promote. */
3888
3889 /*
3890 * Handle superpage REF/CHG bits. If REF or CHG is set in
3891 * any page, then it must be set in the superpage.
3892 *
3893 * Instead of querying each page, we take advantage of two facts:
3894 * 1- If a page is being promoted, it was referenced.
3895 * 2- If promoted pages are writable, they were modified.
3896 */
3897 sp_refchg = LPTE_REF |
3898 ((first->pvo_pte.prot & VM_PROT_WRITE) != 0 ? LPTE_CHG : 0);
3899
3900 /* Promote pages */
3901
3902 for (pvo = first, va_end = PVO_VADDR(pvo) + HPT_SP_SIZE;
3903 pvo != NULL && PVO_VADDR(pvo) < va_end;
3904 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
3905 pvo->pvo_pte.pa &= ADDR_POFF | ~HPT_SP_MASK;
3906 pvo->pvo_pte.pa |= LPTE_LP_4K_16M;
3907 pvo->pvo_vaddr |= PVO_LARGE;
3908 }
3909 moea64_pte_replace_sp(first);
3910
3911 /* Send REF/CHG bits to VM */
3912 moea64_sp_refchg_process(first, m, sp_refchg, first->pvo_pte.prot);
3913
3914 /* Use first page to cache REF/CHG bits */
3915 atomic_set_32(&m->md.mdpg_attrs, sp_refchg | MDPG_ATTR_SP);
3916
3917 PMAP_UNLOCK(pmap);
3918
3919 atomic_add_long(&sp_mappings, 1);
3920 atomic_add_long(&sp_promotions, 1);
3921 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3922 __func__, (uintmax_t)sva, pmap);
3923 return;
3924
3925 error:
3926 atomic_add_long(&sp_p_failures, 1);
3927 PMAP_UNLOCK(pmap);
3928 }
3929 #endif
3930
3931 static void
moea64_sp_demote_aligned(struct pvo_entry * sp)3932 moea64_sp_demote_aligned(struct pvo_entry *sp)
3933 {
3934 struct pvo_entry *pvo;
3935 vm_offset_t va, va_end;
3936 vm_paddr_t pa;
3937 vm_page_t m;
3938 pmap_t pmap __diagused;
3939 int64_t refchg;
3940
3941 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
3942
3943 pmap = sp->pvo_pmap;
3944 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3945
3946 pvo = sp;
3947
3948 /* Demote pages */
3949
3950 va = PVO_VADDR(pvo);
3951 pa = PVO_PADDR(pvo);
3952 m = PHYS_TO_VM_PAGE(pa);
3953
3954 for (pvo = sp, va_end = va + HPT_SP_SIZE;
3955 pvo != NULL && PVO_VADDR(pvo) < va_end;
3956 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo),
3957 va += PAGE_SIZE, pa += PAGE_SIZE) {
3958 KASSERT(pvo && PVO_VADDR(pvo) == va,
3959 ("%s: missing PVO for va %#jx", __func__, (uintmax_t)va));
3960
3961 pvo->pvo_vaddr &= ~PVO_LARGE;
3962 pvo->pvo_pte.pa &= ~LPTE_RPGN;
3963 pvo->pvo_pte.pa |= pa;
3964
3965 }
3966 refchg = moea64_pte_replace_sp(sp);
3967
3968 /*
3969 * Clear SP flag
3970 *
3971 * XXX It is possible that another pmap has this page mapped as
3972 * part of a superpage, but as the SP flag is used only for
3973 * caching SP REF/CHG bits, that will be queried if not set
3974 * in cache, it should be ok to clear it here.
3975 */
3976 atomic_clear_32(&m->md.mdpg_attrs, MDPG_ATTR_SP);
3977
3978 /*
3979 * Handle superpage REF/CHG bits. A bit set in the superpage
3980 * means all pages should consider it set.
3981 */
3982 moea64_sp_refchg_process(sp, m, refchg, sp->pvo_pte.prot);
3983
3984 atomic_add_long(&sp_demotions, 1);
3985 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3986 __func__, (uintmax_t)PVO_VADDR(sp), pmap);
3987 }
3988
3989 static void
moea64_sp_demote(struct pvo_entry * pvo)3990 moea64_sp_demote(struct pvo_entry *pvo)
3991 {
3992 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
3993
3994 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
3995 pvo = moea64_pvo_find_va(pvo->pvo_pmap,
3996 PVO_VADDR(pvo) & ~HPT_SP_MASK);
3997 KASSERT(pvo != NULL, ("%s: missing PVO for va %#jx",
3998 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
3999 }
4000 moea64_sp_demote_aligned(pvo);
4001 }
4002
4003 static struct pvo_entry *
moea64_sp_unwire(struct pvo_entry * sp)4004 moea64_sp_unwire(struct pvo_entry *sp)
4005 {
4006 struct pvo_entry *pvo, *prev;
4007 vm_offset_t eva;
4008 pmap_t pm;
4009 int64_t ret, refchg;
4010
4011 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
4012
4013 pm = sp->pvo_pmap;
4014 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4015
4016 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4017 refchg = 0;
4018 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4019 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
4020 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
4021 panic("%s: pvo %p is missing PVO_WIRED",
4022 __func__, pvo);
4023 pvo->pvo_vaddr &= ~PVO_WIRED;
4024
4025 ret = moea64_pte_replace(pvo, 0 /* No invalidation */);
4026 if (ret < 0)
4027 refchg |= LPTE_CHG;
4028 else
4029 refchg |= ret;
4030
4031 pm->pm_stats.wired_count--;
4032 }
4033
4034 /* Send REF/CHG bits to VM */
4035 moea64_sp_refchg_process(sp, PHYS_TO_VM_PAGE(PVO_PADDR(sp)),
4036 refchg, sp->pvo_pte.prot);
4037
4038 return (prev);
4039 }
4040
4041 static struct pvo_entry *
moea64_sp_protect(struct pvo_entry * sp,vm_prot_t prot)4042 moea64_sp_protect(struct pvo_entry *sp, vm_prot_t prot)
4043 {
4044 struct pvo_entry *pvo, *prev;
4045 vm_offset_t eva;
4046 pmap_t pm;
4047 vm_page_t m, m_end;
4048 int64_t ret, refchg;
4049 vm_prot_t oldprot;
4050
4051 CTR3(KTR_PMAP, "%s: va=%#jx, prot=%x",
4052 __func__, (uintmax_t)PVO_VADDR(sp), prot);
4053
4054 pm = sp->pvo_pmap;
4055 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4056
4057 oldprot = sp->pvo_pte.prot;
4058 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4059 KASSERT(m != NULL, ("%s: missing vm page for pa %#jx",
4060 __func__, (uintmax_t)PVO_PADDR(sp)));
4061 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4062 refchg = 0;
4063
4064 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4065 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
4066 pvo->pvo_pte.prot = prot;
4067 /*
4068 * If the PVO is in the page table, update mapping
4069 */
4070 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
4071 if (ret < 0)
4072 refchg |= LPTE_CHG;
4073 else
4074 refchg |= ret;
4075 }
4076
4077 /* Send REF/CHG bits to VM */
4078 moea64_sp_refchg_process(sp, m, refchg, oldprot);
4079
4080 /* Handle pages that became executable */
4081 if ((m->a.flags & PGA_EXECUTABLE) == 0 &&
4082 (sp->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
4083 if ((m->oflags & VPO_UNMANAGED) == 0)
4084 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++)
4085 vm_page_aflag_set(m, PGA_EXECUTABLE);
4086 moea64_syncicache(pm, PVO_VADDR(sp), PVO_PADDR(sp),
4087 HPT_SP_SIZE);
4088 }
4089
4090 return (prev);
4091 }
4092
4093 static struct pvo_entry *
moea64_sp_remove(struct pvo_entry * sp,struct pvo_dlist * tofree)4094 moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree)
4095 {
4096 struct pvo_entry *pvo, *tpvo;
4097 vm_offset_t eva;
4098 pmap_t pm __diagused;
4099
4100 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
4101
4102 pm = sp->pvo_pmap;
4103 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4104
4105 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4106 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
4107 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
4108
4109 /*
4110 * For locking reasons, remove this from the page table and
4111 * pmap, but save delinking from the vm_page for a second
4112 * pass
4113 */
4114 moea64_pvo_remove_from_pmap(pvo);
4115 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
4116 }
4117
4118 /*
4119 * Clear SP bit
4120 *
4121 * XXX See comment in moea64_sp_demote_aligned() for why it's
4122 * ok to always clear the SP bit on remove/demote.
4123 */
4124 atomic_clear_32(&PHYS_TO_VM_PAGE(PVO_PADDR(sp))->md.mdpg_attrs,
4125 MDPG_ATTR_SP);
4126
4127 return (tpvo);
4128 }
4129
4130 static int64_t
moea64_sp_query_locked(struct pvo_entry * pvo,uint64_t ptebit)4131 moea64_sp_query_locked(struct pvo_entry *pvo, uint64_t ptebit)
4132 {
4133 int64_t refchg, ret;
4134 vm_offset_t eva;
4135 vm_page_t m;
4136 pmap_t pmap;
4137 struct pvo_entry *sp;
4138
4139 pmap = pvo->pvo_pmap;
4140 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
4141
4142 /* Get first SP PVO */
4143 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4144 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4145 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4146 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4147 } else
4148 sp = pvo;
4149 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4150
4151 refchg = 0;
4152 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4153 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4154 ret = moea64_pte_synch(pvo);
4155 if (ret > 0) {
4156 refchg |= ret & (LPTE_CHG | LPTE_REF);
4157 if ((refchg & ptebit) != 0)
4158 break;
4159 }
4160 }
4161
4162 /* Save results */
4163 if (refchg != 0) {
4164 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4165 atomic_set_32(&m->md.mdpg_attrs, refchg | MDPG_ATTR_SP);
4166 }
4167
4168 return (refchg);
4169 }
4170
4171 static int64_t
moea64_sp_query(struct pvo_entry * pvo,uint64_t ptebit)4172 moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit)
4173 {
4174 int64_t refchg;
4175 pmap_t pmap;
4176
4177 pmap = pvo->pvo_pmap;
4178 PMAP_LOCK(pmap);
4179
4180 /*
4181 * Check if SP was demoted/removed before pmap lock was acquired.
4182 */
4183 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4184 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4185 __func__, (uintmax_t)PVO_PADDR(pvo));
4186 PMAP_UNLOCK(pmap);
4187 return (-1);
4188 }
4189
4190 refchg = moea64_sp_query_locked(pvo, ptebit);
4191 PMAP_UNLOCK(pmap);
4192
4193 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4194 __func__, (uintmax_t)PVO_VADDR(pvo),
4195 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)refchg);
4196
4197 return (refchg);
4198 }
4199
4200 static int64_t
moea64_sp_pvo_clear(struct pvo_entry * pvo,uint64_t ptebit)4201 moea64_sp_pvo_clear(struct pvo_entry *pvo, uint64_t ptebit)
4202 {
4203 int64_t refchg, ret;
4204 pmap_t pmap;
4205 struct pvo_entry *sp;
4206 vm_offset_t eva;
4207 vm_page_t m;
4208
4209 pmap = pvo->pvo_pmap;
4210 PMAP_LOCK(pmap);
4211
4212 /*
4213 * Check if SP was demoted/removed before pmap lock was acquired.
4214 */
4215 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4216 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4217 __func__, (uintmax_t)PVO_PADDR(pvo));
4218 PMAP_UNLOCK(pmap);
4219 return (-1);
4220 }
4221
4222 /* Get first SP PVO */
4223 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4224 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4225 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4226 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4227 } else
4228 sp = pvo;
4229 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4230
4231 refchg = 0;
4232 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4233 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4234 ret = moea64_pte_clear(pvo, ptebit);
4235 if (ret > 0)
4236 refchg |= ret & (LPTE_CHG | LPTE_REF);
4237 }
4238
4239 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4240 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
4241 PMAP_UNLOCK(pmap);
4242
4243 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4244 __func__, (uintmax_t)PVO_VADDR(sp),
4245 (uintmax_t)PVO_PADDR(sp), (uintmax_t)refchg);
4246
4247 return (refchg);
4248 }
4249
4250 static int64_t
moea64_sp_clear(struct pvo_entry * pvo,vm_page_t m,uint64_t ptebit)4251 moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m, uint64_t ptebit)
4252 {
4253 int64_t count, ret;
4254 pmap_t pmap;
4255
4256 count = 0;
4257 pmap = pvo->pvo_pmap;
4258
4259 /*
4260 * Since this reference bit is shared by 4096 4KB pages, it
4261 * should not be cleared every time it is tested. Apply a
4262 * simple "hash" function on the physical page number, the
4263 * virtual superpage number, and the pmap address to select
4264 * one 4KB page out of the 4096 on which testing the
4265 * reference bit will result in clearing that reference bit.
4266 * This function is designed to avoid the selection of the
4267 * same 4KB page for every 16MB page mapping.
4268 *
4269 * Always leave the reference bit of a wired mapping set, as
4270 * the current state of its reference bit won't affect page
4271 * replacement.
4272 */
4273 if (ptebit == LPTE_REF && (((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) ^
4274 (PVO_VADDR(pvo) >> HPT_SP_SHIFT) ^ (uintptr_t)pmap) &
4275 (HPT_SP_PAGES - 1)) == 0 && (pvo->pvo_vaddr & PVO_WIRED) == 0) {
4276 if ((ret = moea64_sp_pvo_clear(pvo, ptebit)) == -1)
4277 return (-1);
4278
4279 if ((ret & ptebit) != 0)
4280 count++;
4281
4282 /*
4283 * If this page was not selected by the hash function, then assume
4284 * its REF bit was set.
4285 */
4286 } else if (ptebit == LPTE_REF) {
4287 count++;
4288
4289 /*
4290 * To clear the CHG bit of a single SP page, first it must be demoted.
4291 * But if no CHG bit is set, no bit clear and thus no SP demotion is
4292 * needed.
4293 */
4294 } else {
4295 CTR4(KTR_PMAP, "%s: ptebit=%#jx, va=%#jx, pa=%#jx",
4296 __func__, (uintmax_t)ptebit, (uintmax_t)PVO_VADDR(pvo),
4297 (uintmax_t)PVO_PADDR(pvo));
4298
4299 PMAP_LOCK(pmap);
4300
4301 /*
4302 * Make sure SP wasn't demoted/removed before pmap lock
4303 * was acquired.
4304 */
4305 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4306 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4307 __func__, (uintmax_t)PVO_PADDR(pvo));
4308 PMAP_UNLOCK(pmap);
4309 return (-1);
4310 }
4311
4312 ret = moea64_sp_query_locked(pvo, ptebit);
4313 if ((ret & ptebit) != 0)
4314 count++;
4315 else {
4316 PMAP_UNLOCK(pmap);
4317 return (0);
4318 }
4319
4320 moea64_sp_demote(pvo);
4321 moea64_pte_clear(pvo, ptebit);
4322
4323 /*
4324 * Write protect the mapping to a single page so that a
4325 * subsequent write access may repromote.
4326 */
4327 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
4328 moea64_pvo_protect(pmap, pvo,
4329 pvo->pvo_pte.prot & ~VM_PROT_WRITE);
4330
4331 PMAP_UNLOCK(pmap);
4332 }
4333
4334 return (count);
4335 }
4336