1 /*- 2 * Copyright (c) 2008-2015 Nathan Whitehorn 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 /* 31 * Manages physical address maps. 32 * 33 * Since the information managed by this module is also stored by the 34 * logical address mapping module, this module may throw away valid virtual 35 * to physical mappings at almost any time. However, invalidations of 36 * mappings must be done as requested. 37 * 38 * In order to cope with hardware architectures which make virtual to 39 * physical map invalidates expensive, this module may delay invalidate 40 * reduced protection operations until such time as they are actually 41 * necessary. This module is given full information as to which processors 42 * are currently using which maps, and to when physical maps must be made 43 * correct. 44 */ 45 46 #include "opt_compat.h" 47 #include "opt_kstack_pages.h" 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/conf.h> 52 #include <sys/queue.h> 53 #include <sys/cpuset.h> 54 #include <sys/kerneldump.h> 55 #include <sys/ktr.h> 56 #include <sys/lock.h> 57 #include <sys/msgbuf.h> 58 #include <sys/malloc.h> 59 #include <sys/mutex.h> 60 #include <sys/proc.h> 61 #include <sys/rwlock.h> 62 #include <sys/sched.h> 63 #include <sys/sysctl.h> 64 #include <sys/systm.h> 65 #include <sys/vmmeter.h> 66 67 #include <sys/kdb.h> 68 69 #include <dev/ofw/openfirm.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_param.h> 73 #include <vm/vm_kern.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_map.h> 76 #include <vm/vm_object.h> 77 #include <vm/vm_extern.h> 78 #include <vm/vm_pageout.h> 79 #include <vm/uma.h> 80 81 #include <machine/_inttypes.h> 82 #include <machine/cpu.h> 83 #include <machine/platform.h> 84 #include <machine/frame.h> 85 #include <machine/md_var.h> 86 #include <machine/psl.h> 87 #include <machine/bat.h> 88 #include <machine/hid.h> 89 #include <machine/pte.h> 90 #include <machine/sr.h> 91 #include <machine/trap.h> 92 #include <machine/mmuvar.h> 93 94 #include "mmu_oea64.h" 95 #include "mmu_if.h" 96 #include "moea64_if.h" 97 98 void moea64_release_vsid(uint64_t vsid); 99 uintptr_t moea64_get_unique_vsid(void); 100 101 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 102 #define ENABLE_TRANS(msr) mtmsr(msr) 103 104 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) 105 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) 106 #define VSID_HASH_MASK 0x0000007fffffffffULL 107 108 /* 109 * Locking semantics: 110 * 111 * There are two locks of interest: the page locks and the pmap locks, which 112 * protect their individual PVO lists and are locked in that order. The contents 113 * of all PVO entries are protected by the locks of their respective pmaps. 114 * The pmap of any PVO is guaranteed not to change so long as the PVO is linked 115 * into any list. 116 * 117 */ 118 119 #define PV_LOCK_COUNT PA_LOCK_COUNT*3 120 static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; 121 122 #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT])) 123 #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) 124 #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) 125 #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) 126 #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) 127 #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) 128 #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) 129 130 struct ofw_map { 131 cell_t om_va; 132 cell_t om_len; 133 uint64_t om_pa; 134 cell_t om_mode; 135 }; 136 137 extern unsigned char _etext[]; 138 extern unsigned char _end[]; 139 140 extern int ofw_real_mode; 141 142 /* 143 * Map of physical memory regions. 144 */ 145 static struct mem_region *regions; 146 static struct mem_region *pregions; 147 static u_int phys_avail_count; 148 static int regions_sz, pregions_sz; 149 150 extern void bs_remap_earlyboot(void); 151 152 /* 153 * Lock for the SLB tables. 154 */ 155 struct mtx moea64_slb_mutex; 156 157 /* 158 * PTEG data. 159 */ 160 u_int moea64_pteg_count; 161 u_int moea64_pteg_mask; 162 163 /* 164 * PVO data. 165 */ 166 167 uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ 168 169 static struct pvo_entry *moea64_bpvo_pool; 170 static int moea64_bpvo_pool_index = 0; 171 static int moea64_bpvo_pool_size = 327680; 172 TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); 173 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, 174 &moea64_bpvo_pool_index, 0, ""); 175 176 #define VSID_NBPW (sizeof(u_int32_t) * 8) 177 #ifdef __powerpc64__ 178 #define NVSIDS (NPMAPS * 16) 179 #define VSID_HASHMASK 0xffffffffUL 180 #else 181 #define NVSIDS NPMAPS 182 #define VSID_HASHMASK 0xfffffUL 183 #endif 184 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; 185 186 static boolean_t moea64_initialized = FALSE; 187 188 /* 189 * Statistics. 190 */ 191 u_int moea64_pte_valid = 0; 192 u_int moea64_pte_overflow = 0; 193 u_int moea64_pvo_entries = 0; 194 u_int moea64_pvo_enter_calls = 0; 195 u_int moea64_pvo_remove_calls = 0; 196 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 197 &moea64_pte_valid, 0, ""); 198 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, 199 &moea64_pte_overflow, 0, ""); 200 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 201 &moea64_pvo_entries, 0, ""); 202 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, 203 &moea64_pvo_enter_calls, 0, ""); 204 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, 205 &moea64_pvo_remove_calls, 0, ""); 206 207 vm_offset_t moea64_scratchpage_va[2]; 208 struct pvo_entry *moea64_scratchpage_pvo[2]; 209 struct mtx moea64_scratchpage_mtx; 210 211 uint64_t moea64_large_page_mask = 0; 212 uint64_t moea64_large_page_size = 0; 213 int moea64_large_page_shift = 0; 214 215 /* 216 * PVO calls. 217 */ 218 static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, 219 struct pvo_head *pvo_head); 220 static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); 221 static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); 222 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); 223 224 /* 225 * Utility routines. 226 */ 227 static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); 228 static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); 229 static void moea64_kremove(mmu_t, vm_offset_t); 230 static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 231 vm_offset_t pa, vm_size_t sz); 232 233 /* 234 * Kernel MMU interface 235 */ 236 void moea64_clear_modify(mmu_t, vm_page_t); 237 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); 238 void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 239 vm_page_t *mb, vm_offset_t b_offset, int xfersize); 240 int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, 241 u_int flags, int8_t psind); 242 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 243 vm_prot_t); 244 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 245 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); 246 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 247 void moea64_init(mmu_t); 248 boolean_t moea64_is_modified(mmu_t, vm_page_t); 249 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 250 boolean_t moea64_is_referenced(mmu_t, vm_page_t); 251 int moea64_ts_referenced(mmu_t, vm_page_t); 252 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); 253 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); 254 int moea64_page_wired_mappings(mmu_t, vm_page_t); 255 void moea64_pinit(mmu_t, pmap_t); 256 void moea64_pinit0(mmu_t, pmap_t); 257 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 258 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 259 void moea64_qremove(mmu_t, vm_offset_t, int); 260 void moea64_release(mmu_t, pmap_t); 261 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 262 void moea64_remove_pages(mmu_t, pmap_t); 263 void moea64_remove_all(mmu_t, vm_page_t); 264 void moea64_remove_write(mmu_t, vm_page_t); 265 void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 266 void moea64_zero_page(mmu_t, vm_page_t); 267 void moea64_zero_page_area(mmu_t, vm_page_t, int, int); 268 void moea64_zero_page_idle(mmu_t, vm_page_t); 269 void moea64_activate(mmu_t, struct thread *); 270 void moea64_deactivate(mmu_t, struct thread *); 271 void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); 272 void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 273 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); 274 vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); 275 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); 276 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma); 277 void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); 278 boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 279 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 280 void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, 281 void **va); 282 void moea64_scan_init(mmu_t mmu); 283 284 static mmu_method_t moea64_methods[] = { 285 MMUMETHOD(mmu_clear_modify, moea64_clear_modify), 286 MMUMETHOD(mmu_copy_page, moea64_copy_page), 287 MMUMETHOD(mmu_copy_pages, moea64_copy_pages), 288 MMUMETHOD(mmu_enter, moea64_enter), 289 MMUMETHOD(mmu_enter_object, moea64_enter_object), 290 MMUMETHOD(mmu_enter_quick, moea64_enter_quick), 291 MMUMETHOD(mmu_extract, moea64_extract), 292 MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), 293 MMUMETHOD(mmu_init, moea64_init), 294 MMUMETHOD(mmu_is_modified, moea64_is_modified), 295 MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), 296 MMUMETHOD(mmu_is_referenced, moea64_is_referenced), 297 MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), 298 MMUMETHOD(mmu_map, moea64_map), 299 MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), 300 MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), 301 MMUMETHOD(mmu_pinit, moea64_pinit), 302 MMUMETHOD(mmu_pinit0, moea64_pinit0), 303 MMUMETHOD(mmu_protect, moea64_protect), 304 MMUMETHOD(mmu_qenter, moea64_qenter), 305 MMUMETHOD(mmu_qremove, moea64_qremove), 306 MMUMETHOD(mmu_release, moea64_release), 307 MMUMETHOD(mmu_remove, moea64_remove), 308 MMUMETHOD(mmu_remove_pages, moea64_remove_pages), 309 MMUMETHOD(mmu_remove_all, moea64_remove_all), 310 MMUMETHOD(mmu_remove_write, moea64_remove_write), 311 MMUMETHOD(mmu_sync_icache, moea64_sync_icache), 312 MMUMETHOD(mmu_unwire, moea64_unwire), 313 MMUMETHOD(mmu_zero_page, moea64_zero_page), 314 MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), 315 MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), 316 MMUMETHOD(mmu_activate, moea64_activate), 317 MMUMETHOD(mmu_deactivate, moea64_deactivate), 318 MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), 319 320 /* Internal interfaces */ 321 MMUMETHOD(mmu_mapdev, moea64_mapdev), 322 MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), 323 MMUMETHOD(mmu_unmapdev, moea64_unmapdev), 324 MMUMETHOD(mmu_kextract, moea64_kextract), 325 MMUMETHOD(mmu_kenter, moea64_kenter), 326 MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), 327 MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), 328 MMUMETHOD(mmu_scan_init, moea64_scan_init), 329 MMUMETHOD(mmu_dumpsys_map, moea64_dumpsys_map), 330 331 { 0, 0 } 332 }; 333 334 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); 335 336 static struct pvo_head * 337 vm_page_to_pvoh(vm_page_t m) 338 { 339 340 mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); 341 return (&m->md.mdpg_pvoh); 342 } 343 344 static struct pvo_entry * 345 alloc_pvo_entry(int bootstrap) 346 { 347 struct pvo_entry *pvo; 348 349 if (!moea64_initialized || bootstrap) { 350 if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { 351 panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", 352 moea64_bpvo_pool_index, moea64_bpvo_pool_size, 353 moea64_bpvo_pool_size * sizeof(struct pvo_entry)); 354 } 355 pvo = &moea64_bpvo_pool[ 356 atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; 357 bzero(pvo, sizeof(*pvo)); 358 pvo->pvo_vaddr = PVO_BOOTSTRAP; 359 } else { 360 pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT); 361 bzero(pvo, sizeof(*pvo)); 362 } 363 364 return (pvo); 365 } 366 367 368 static void 369 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) 370 { 371 uint64_t vsid; 372 uint64_t hash; 373 int shift; 374 375 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 376 377 pvo->pvo_pmap = pmap; 378 va &= ~ADDR_POFF; 379 pvo->pvo_vaddr |= va; 380 vsid = va_to_vsid(pmap, va); 381 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) 382 | (vsid << 16); 383 384 shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : 385 ADDR_PIDX_SHFT; 386 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); 387 pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; 388 } 389 390 static void 391 free_pvo_entry(struct pvo_entry *pvo) 392 { 393 394 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 395 uma_zfree(moea64_pvo_zone, pvo); 396 } 397 398 void 399 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) 400 { 401 402 lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & 403 LPTE_AVPN_MASK; 404 lpte->pte_hi |= LPTE_VALID; 405 406 if (pvo->pvo_vaddr & PVO_LARGE) 407 lpte->pte_hi |= LPTE_BIG; 408 if (pvo->pvo_vaddr & PVO_WIRED) 409 lpte->pte_hi |= LPTE_WIRED; 410 if (pvo->pvo_vaddr & PVO_HID) 411 lpte->pte_hi |= LPTE_HID; 412 413 lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ 414 if (pvo->pvo_pte.prot & VM_PROT_WRITE) 415 lpte->pte_lo |= LPTE_BW; 416 else 417 lpte->pte_lo |= LPTE_BR; 418 419 if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) 420 lpte->pte_lo |= LPTE_NOEXEC; 421 } 422 423 static __inline uint64_t 424 moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 425 { 426 uint64_t pte_lo; 427 int i; 428 429 if (ma != VM_MEMATTR_DEFAULT) { 430 switch (ma) { 431 case VM_MEMATTR_UNCACHEABLE: 432 return (LPTE_I | LPTE_G); 433 case VM_MEMATTR_WRITE_COMBINING: 434 case VM_MEMATTR_WRITE_BACK: 435 case VM_MEMATTR_PREFETCHABLE: 436 return (LPTE_I); 437 case VM_MEMATTR_WRITE_THROUGH: 438 return (LPTE_W | LPTE_M); 439 } 440 } 441 442 /* 443 * Assume the page is cache inhibited and access is guarded unless 444 * it's in our available memory array. 445 */ 446 pte_lo = LPTE_I | LPTE_G; 447 for (i = 0; i < pregions_sz; i++) { 448 if ((pa >= pregions[i].mr_start) && 449 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 450 pte_lo &= ~(LPTE_I | LPTE_G); 451 pte_lo |= LPTE_M; 452 break; 453 } 454 } 455 456 return pte_lo; 457 } 458 459 /* 460 * Quick sort callout for comparing memory regions. 461 */ 462 static int om_cmp(const void *a, const void *b); 463 464 static int 465 om_cmp(const void *a, const void *b) 466 { 467 const struct ofw_map *mapa; 468 const struct ofw_map *mapb; 469 470 mapa = a; 471 mapb = b; 472 if (mapa->om_pa < mapb->om_pa) 473 return (-1); 474 else if (mapa->om_pa > mapb->om_pa) 475 return (1); 476 else 477 return (0); 478 } 479 480 static void 481 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) 482 { 483 struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ 484 pcell_t acells, trans_cells[sz/sizeof(cell_t)]; 485 struct pvo_entry *pvo; 486 register_t msr; 487 vm_offset_t off; 488 vm_paddr_t pa_base; 489 int i, j; 490 491 bzero(translations, sz); 492 OF_getprop(OF_finddevice("/"), "#address-cells", &acells, 493 sizeof(acells)); 494 if (OF_getprop(mmu, "translations", trans_cells, sz) == -1) 495 panic("moea64_bootstrap: can't get ofw translations"); 496 497 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); 498 sz /= sizeof(cell_t); 499 for (i = 0, j = 0; i < sz; j++) { 500 translations[j].om_va = trans_cells[i++]; 501 translations[j].om_len = trans_cells[i++]; 502 translations[j].om_pa = trans_cells[i++]; 503 if (acells == 2) { 504 translations[j].om_pa <<= 32; 505 translations[j].om_pa |= trans_cells[i++]; 506 } 507 translations[j].om_mode = trans_cells[i++]; 508 } 509 KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)", 510 i, sz)); 511 512 sz = j; 513 qsort(translations, sz, sizeof (*translations), om_cmp); 514 515 for (i = 0; i < sz; i++) { 516 pa_base = translations[i].om_pa; 517 #ifndef __powerpc64__ 518 if ((translations[i].om_pa >> 32) != 0) 519 panic("OFW translations above 32-bit boundary!"); 520 #endif 521 522 if (pa_base % PAGE_SIZE) 523 panic("OFW translation not page-aligned (phys)!"); 524 if (translations[i].om_va % PAGE_SIZE) 525 panic("OFW translation not page-aligned (virt)!"); 526 527 CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x", 528 pa_base, translations[i].om_va, translations[i].om_len); 529 530 /* Now enter the pages for this mapping */ 531 532 DISABLE_TRANS(msr); 533 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 534 /* If this address is direct-mapped, skip remapping */ 535 if (hw_direct_map && translations[i].om_va == pa_base && 536 moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) 537 continue; 538 539 PMAP_LOCK(kernel_pmap); 540 pvo = moea64_pvo_find_va(kernel_pmap, 541 translations[i].om_va + off); 542 PMAP_UNLOCK(kernel_pmap); 543 if (pvo != NULL) 544 continue; 545 546 moea64_kenter(mmup, translations[i].om_va + off, 547 pa_base + off); 548 } 549 ENABLE_TRANS(msr); 550 } 551 } 552 553 #ifdef __powerpc64__ 554 static void 555 moea64_probe_large_page(void) 556 { 557 uint16_t pvr = mfpvr() >> 16; 558 559 switch (pvr) { 560 case IBM970: 561 case IBM970FX: 562 case IBM970MP: 563 powerpc_sync(); isync(); 564 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); 565 powerpc_sync(); isync(); 566 567 /* FALLTHROUGH */ 568 default: 569 moea64_large_page_size = 0x1000000; /* 16 MB */ 570 moea64_large_page_shift = 24; 571 } 572 573 moea64_large_page_mask = moea64_large_page_size - 1; 574 } 575 576 static void 577 moea64_bootstrap_slb_prefault(vm_offset_t va, int large) 578 { 579 struct slb *cache; 580 struct slb entry; 581 uint64_t esid, slbe; 582 uint64_t i; 583 584 cache = PCPU_GET(slb); 585 esid = va >> ADDR_SR_SHFT; 586 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; 587 588 for (i = 0; i < 64; i++) { 589 if (cache[i].slbe == (slbe | i)) 590 return; 591 } 592 593 entry.slbe = slbe; 594 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; 595 if (large) 596 entry.slbv |= SLBV_L; 597 598 slb_insert_kernel(entry.slbe, entry.slbv); 599 } 600 #endif 601 602 static void 603 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, 604 vm_offset_t kernelend) 605 { 606 struct pvo_entry *pvo; 607 register_t msr; 608 vm_paddr_t pa; 609 vm_offset_t size, off; 610 uint64_t pte_lo; 611 int i; 612 613 if (moea64_large_page_size == 0) 614 hw_direct_map = 0; 615 616 DISABLE_TRANS(msr); 617 if (hw_direct_map) { 618 PMAP_LOCK(kernel_pmap); 619 for (i = 0; i < pregions_sz; i++) { 620 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + 621 pregions[i].mr_size; pa += moea64_large_page_size) { 622 pte_lo = LPTE_M; 623 624 pvo = alloc_pvo_entry(1 /* bootstrap */); 625 pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; 626 init_pvo_entry(pvo, kernel_pmap, pa); 627 628 /* 629 * Set memory access as guarded if prefetch within 630 * the page could exit the available physmem area. 631 */ 632 if (pa & moea64_large_page_mask) { 633 pa &= moea64_large_page_mask; 634 pte_lo |= LPTE_G; 635 } 636 if (pa + moea64_large_page_size > 637 pregions[i].mr_start + pregions[i].mr_size) 638 pte_lo |= LPTE_G; 639 640 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | 641 VM_PROT_EXECUTE; 642 pvo->pvo_pte.pa = pa | pte_lo; 643 moea64_pvo_enter(mmup, pvo, NULL); 644 } 645 } 646 PMAP_UNLOCK(kernel_pmap); 647 } else { 648 size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); 649 off = (vm_offset_t)(moea64_bpvo_pool); 650 for (pa = off; pa < off + size; pa += PAGE_SIZE) 651 moea64_kenter(mmup, pa, pa); 652 653 /* 654 * Map certain important things, like ourselves. 655 * 656 * NOTE: We do not map the exception vector space. That code is 657 * used only in real mode, and leaving it unmapped allows us to 658 * catch NULL pointer deferences, instead of making NULL a valid 659 * address. 660 */ 661 662 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; 663 pa += PAGE_SIZE) 664 moea64_kenter(mmup, pa, pa); 665 } 666 ENABLE_TRANS(msr); 667 668 /* 669 * Allow user to override unmapped_buf_allowed for testing. 670 * XXXKIB Only direct map implementation was tested. 671 */ 672 if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", 673 &unmapped_buf_allowed)) 674 unmapped_buf_allowed = hw_direct_map; 675 } 676 677 void 678 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 679 { 680 int i, j; 681 vm_size_t physsz, hwphyssz; 682 683 #ifndef __powerpc64__ 684 /* We don't have a direct map since there is no BAT */ 685 hw_direct_map = 0; 686 687 /* Make sure battable is zero, since we have no BAT */ 688 for (i = 0; i < 16; i++) { 689 battable[i].batu = 0; 690 battable[i].batl = 0; 691 } 692 #else 693 moea64_probe_large_page(); 694 695 /* Use a direct map if we have large page support */ 696 if (moea64_large_page_size > 0) 697 hw_direct_map = 1; 698 else 699 hw_direct_map = 0; 700 #endif 701 702 /* Get physical memory regions from firmware */ 703 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 704 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); 705 706 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 707 panic("moea64_bootstrap: phys_avail too small"); 708 709 phys_avail_count = 0; 710 physsz = 0; 711 hwphyssz = 0; 712 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 713 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 714 CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)", 715 regions[i].mr_start, regions[i].mr_start + 716 regions[i].mr_size, regions[i].mr_size); 717 if (hwphyssz != 0 && 718 (physsz + regions[i].mr_size) >= hwphyssz) { 719 if (physsz < hwphyssz) { 720 phys_avail[j] = regions[i].mr_start; 721 phys_avail[j + 1] = regions[i].mr_start + 722 hwphyssz - physsz; 723 physsz = hwphyssz; 724 phys_avail_count++; 725 } 726 break; 727 } 728 phys_avail[j] = regions[i].mr_start; 729 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 730 phys_avail_count++; 731 physsz += regions[i].mr_size; 732 } 733 734 /* Check for overlap with the kernel and exception vectors */ 735 for (j = 0; j < 2*phys_avail_count; j+=2) { 736 if (phys_avail[j] < EXC_LAST) 737 phys_avail[j] += EXC_LAST; 738 739 if (kernelstart >= phys_avail[j] && 740 kernelstart < phys_avail[j+1]) { 741 if (kernelend < phys_avail[j+1]) { 742 phys_avail[2*phys_avail_count] = 743 (kernelend & ~PAGE_MASK) + PAGE_SIZE; 744 phys_avail[2*phys_avail_count + 1] = 745 phys_avail[j+1]; 746 phys_avail_count++; 747 } 748 749 phys_avail[j+1] = kernelstart & ~PAGE_MASK; 750 } 751 752 if (kernelend >= phys_avail[j] && 753 kernelend < phys_avail[j+1]) { 754 if (kernelstart > phys_avail[j]) { 755 phys_avail[2*phys_avail_count] = phys_avail[j]; 756 phys_avail[2*phys_avail_count + 1] = 757 kernelstart & ~PAGE_MASK; 758 phys_avail_count++; 759 } 760 761 phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; 762 } 763 } 764 765 physmem = btoc(physsz); 766 767 #ifdef PTEGCOUNT 768 moea64_pteg_count = PTEGCOUNT; 769 #else 770 moea64_pteg_count = 0x1000; 771 772 while (moea64_pteg_count < physmem) 773 moea64_pteg_count <<= 1; 774 775 moea64_pteg_count >>= 1; 776 #endif /* PTEGCOUNT */ 777 } 778 779 void 780 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 781 { 782 int i; 783 784 /* 785 * Set PTEG mask 786 */ 787 moea64_pteg_mask = moea64_pteg_count - 1; 788 789 /* 790 * Initialize SLB table lock and page locks 791 */ 792 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); 793 for (i = 0; i < PV_LOCK_COUNT; i++) 794 mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); 795 796 /* 797 * Initialise the bootstrap pvo pool. 798 */ 799 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( 800 moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0); 801 moea64_bpvo_pool_index = 0; 802 803 /* 804 * Make sure kernel vsid is allocated as well as VSID 0. 805 */ 806 #ifndef __powerpc64__ 807 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] 808 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 809 moea64_vsid_bitmap[0] |= 1; 810 #endif 811 812 /* 813 * Initialize the kernel pmap (which is statically allocated). 814 */ 815 #ifdef __powerpc64__ 816 for (i = 0; i < 64; i++) { 817 pcpup->pc_slb[i].slbv = 0; 818 pcpup->pc_slb[i].slbe = 0; 819 } 820 #else 821 for (i = 0; i < 16; i++) 822 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 823 #endif 824 825 kernel_pmap->pmap_phys = kernel_pmap; 826 CPU_FILL(&kernel_pmap->pm_active); 827 RB_INIT(&kernel_pmap->pmap_pvo); 828 829 PMAP_LOCK_INIT(kernel_pmap); 830 831 /* 832 * Now map in all the other buffers we allocated earlier 833 */ 834 835 moea64_setup_direct_map(mmup, kernelstart, kernelend); 836 } 837 838 void 839 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 840 { 841 ihandle_t mmui; 842 phandle_t chosen; 843 phandle_t mmu; 844 ssize_t sz; 845 int i; 846 vm_offset_t pa, va; 847 void *dpcpu; 848 849 /* 850 * Set up the Open Firmware pmap and add its mappings if not in real 851 * mode. 852 */ 853 854 chosen = OF_finddevice("/chosen"); 855 if (!ofw_real_mode && chosen != -1 && 856 OF_getprop(chosen, "mmu", &mmui, 4) != -1) { 857 mmu = OF_instance_to_package(mmui); 858 if (mmu == -1 || 859 (sz = OF_getproplen(mmu, "translations")) == -1) 860 sz = 0; 861 if (sz > 6144 /* tmpstksz - 2 KB headroom */) 862 panic("moea64_bootstrap: too many ofw translations"); 863 864 if (sz > 0) 865 moea64_add_ofw_mappings(mmup, mmu, sz); 866 } 867 868 /* 869 * Calculate the last available physical address. 870 */ 871 for (i = 0; phys_avail[i + 2] != 0; i += 2) 872 ; 873 Maxmem = powerpc_btop(phys_avail[i + 1]); 874 875 /* 876 * Initialize MMU and remap early physical mappings 877 */ 878 MMU_CPU_BOOTSTRAP(mmup,0); 879 mtmsr(mfmsr() | PSL_DR | PSL_IR); 880 pmap_bootstrapped++; 881 bs_remap_earlyboot(); 882 883 /* 884 * Set the start and end of kva. 885 */ 886 virtual_avail = VM_MIN_KERNEL_ADDRESS; 887 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 888 889 /* 890 * Map the entire KVA range into the SLB. We must not fault there. 891 */ 892 #ifdef __powerpc64__ 893 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) 894 moea64_bootstrap_slb_prefault(va, 0); 895 #endif 896 897 /* 898 * Figure out how far we can extend virtual_end into segment 16 899 * without running into existing mappings. Segment 16 is guaranteed 900 * to contain neither RAM nor devices (at least on Apple hardware), 901 * but will generally contain some OFW mappings we should not 902 * step on. 903 */ 904 905 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ 906 PMAP_LOCK(kernel_pmap); 907 while (virtual_end < VM_MAX_KERNEL_ADDRESS && 908 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) 909 virtual_end += PAGE_SIZE; 910 PMAP_UNLOCK(kernel_pmap); 911 #endif 912 913 /* 914 * Allocate a kernel stack with a guard page for thread0 and map it 915 * into the kernel page map. 916 */ 917 pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 918 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 919 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 920 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); 921 thread0.td_kstack = va; 922 thread0.td_kstack_pages = KSTACK_PAGES; 923 for (i = 0; i < KSTACK_PAGES; i++) { 924 moea64_kenter(mmup, va, pa); 925 pa += PAGE_SIZE; 926 va += PAGE_SIZE; 927 } 928 929 /* 930 * Allocate virtual address space for the message buffer. 931 */ 932 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); 933 msgbufp = (struct msgbuf *)virtual_avail; 934 va = virtual_avail; 935 virtual_avail += round_page(msgbufsize); 936 while (va < virtual_avail) { 937 moea64_kenter(mmup, va, pa); 938 pa += PAGE_SIZE; 939 va += PAGE_SIZE; 940 } 941 942 /* 943 * Allocate virtual address space for the dynamic percpu area. 944 */ 945 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 946 dpcpu = (void *)virtual_avail; 947 va = virtual_avail; 948 virtual_avail += DPCPU_SIZE; 949 while (va < virtual_avail) { 950 moea64_kenter(mmup, va, pa); 951 pa += PAGE_SIZE; 952 va += PAGE_SIZE; 953 } 954 dpcpu_init(dpcpu, 0); 955 956 /* 957 * Allocate some things for page zeroing. We put this directly 958 * in the page table and use MOEA64_PTE_REPLACE to avoid any 959 * of the PVO book-keeping or other parts of the VM system 960 * from even knowing that this hack exists. 961 */ 962 963 if (!hw_direct_map) { 964 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, 965 MTX_DEF); 966 for (i = 0; i < 2; i++) { 967 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; 968 virtual_end -= PAGE_SIZE; 969 970 moea64_kenter(mmup, moea64_scratchpage_va[i], 0); 971 972 PMAP_LOCK(kernel_pmap); 973 moea64_scratchpage_pvo[i] = moea64_pvo_find_va( 974 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); 975 PMAP_UNLOCK(kernel_pmap); 976 } 977 } 978 } 979 980 /* 981 * Activate a user pmap. This mostly involves setting some non-CPU 982 * state. 983 */ 984 void 985 moea64_activate(mmu_t mmu, struct thread *td) 986 { 987 pmap_t pm; 988 989 pm = &td->td_proc->p_vmspace->vm_pmap; 990 CPU_SET(PCPU_GET(cpuid), &pm->pm_active); 991 992 #ifdef __powerpc64__ 993 PCPU_SET(userslb, pm->pm_slb); 994 __asm __volatile("slbmte %0, %1; isync" :: 995 "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); 996 #else 997 PCPU_SET(curpmap, pm->pmap_phys); 998 mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid); 999 #endif 1000 } 1001 1002 void 1003 moea64_deactivate(mmu_t mmu, struct thread *td) 1004 { 1005 pmap_t pm; 1006 1007 __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); 1008 1009 pm = &td->td_proc->p_vmspace->vm_pmap; 1010 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); 1011 #ifdef __powerpc64__ 1012 PCPU_SET(userslb, NULL); 1013 #else 1014 PCPU_SET(curpmap, NULL); 1015 #endif 1016 } 1017 1018 void 1019 moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 1020 { 1021 struct pvo_entry key, *pvo; 1022 vm_page_t m; 1023 int64_t refchg; 1024 1025 key.pvo_vaddr = sva; 1026 PMAP_LOCK(pm); 1027 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 1028 pvo != NULL && PVO_VADDR(pvo) < eva; 1029 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { 1030 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 1031 panic("moea64_unwire: pvo %p is missing PVO_WIRED", 1032 pvo); 1033 pvo->pvo_vaddr &= ~PVO_WIRED; 1034 refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); 1035 if ((pvo->pvo_vaddr & PVO_MANAGED) && 1036 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1037 if (refchg < 0) 1038 refchg = LPTE_CHG; 1039 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1040 1041 refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); 1042 if (refchg & LPTE_CHG) 1043 vm_page_dirty(m); 1044 if (refchg & LPTE_REF) 1045 vm_page_aflag_set(m, PGA_REFERENCED); 1046 } 1047 pm->pm_stats.wired_count--; 1048 } 1049 PMAP_UNLOCK(pm); 1050 } 1051 1052 /* 1053 * This goes through and sets the physical address of our 1054 * special scratch PTE to the PA we want to zero or copy. Because 1055 * of locking issues (this can get called in pvo_enter() by 1056 * the UMA allocator), we can't use most other utility functions here 1057 */ 1058 1059 static __inline 1060 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { 1061 1062 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); 1063 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); 1064 1065 moea64_scratchpage_pvo[which]->pvo_pte.pa = 1066 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; 1067 MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which], 1068 MOEA64_PTE_INVALIDATE); 1069 isync(); 1070 } 1071 1072 void 1073 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1074 { 1075 vm_offset_t dst; 1076 vm_offset_t src; 1077 1078 dst = VM_PAGE_TO_PHYS(mdst); 1079 src = VM_PAGE_TO_PHYS(msrc); 1080 1081 if (hw_direct_map) { 1082 bcopy((void *)src, (void *)dst, PAGE_SIZE); 1083 } else { 1084 mtx_lock(&moea64_scratchpage_mtx); 1085 1086 moea64_set_scratchpage_pa(mmu, 0, src); 1087 moea64_set_scratchpage_pa(mmu, 1, dst); 1088 1089 bcopy((void *)moea64_scratchpage_va[0], 1090 (void *)moea64_scratchpage_va[1], PAGE_SIZE); 1091 1092 mtx_unlock(&moea64_scratchpage_mtx); 1093 } 1094 } 1095 1096 static inline void 1097 moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1098 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1099 { 1100 void *a_cp, *b_cp; 1101 vm_offset_t a_pg_offset, b_pg_offset; 1102 int cnt; 1103 1104 while (xfersize > 0) { 1105 a_pg_offset = a_offset & PAGE_MASK; 1106 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 1107 a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) + 1108 a_pg_offset; 1109 b_pg_offset = b_offset & PAGE_MASK; 1110 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 1111 b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) + 1112 b_pg_offset; 1113 bcopy(a_cp, b_cp, cnt); 1114 a_offset += cnt; 1115 b_offset += cnt; 1116 xfersize -= cnt; 1117 } 1118 } 1119 1120 static inline void 1121 moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1122 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1123 { 1124 void *a_cp, *b_cp; 1125 vm_offset_t a_pg_offset, b_pg_offset; 1126 int cnt; 1127 1128 mtx_lock(&moea64_scratchpage_mtx); 1129 while (xfersize > 0) { 1130 a_pg_offset = a_offset & PAGE_MASK; 1131 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 1132 moea64_set_scratchpage_pa(mmu, 0, 1133 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 1134 a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset; 1135 b_pg_offset = b_offset & PAGE_MASK; 1136 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 1137 moea64_set_scratchpage_pa(mmu, 1, 1138 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 1139 b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset; 1140 bcopy(a_cp, b_cp, cnt); 1141 a_offset += cnt; 1142 b_offset += cnt; 1143 xfersize -= cnt; 1144 } 1145 mtx_unlock(&moea64_scratchpage_mtx); 1146 } 1147 1148 void 1149 moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1150 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1151 { 1152 1153 if (hw_direct_map) { 1154 moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset, 1155 xfersize); 1156 } else { 1157 moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset, 1158 xfersize); 1159 } 1160 } 1161 1162 void 1163 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1164 { 1165 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1166 1167 if (size + off > PAGE_SIZE) 1168 panic("moea64_zero_page: size + off > PAGE_SIZE"); 1169 1170 if (hw_direct_map) { 1171 bzero((caddr_t)pa + off, size); 1172 } else { 1173 mtx_lock(&moea64_scratchpage_mtx); 1174 moea64_set_scratchpage_pa(mmu, 0, pa); 1175 bzero((caddr_t)moea64_scratchpage_va[0] + off, size); 1176 mtx_unlock(&moea64_scratchpage_mtx); 1177 } 1178 } 1179 1180 /* 1181 * Zero a page of physical memory by temporarily mapping it 1182 */ 1183 void 1184 moea64_zero_page(mmu_t mmu, vm_page_t m) 1185 { 1186 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1187 vm_offset_t va, off; 1188 1189 if (!hw_direct_map) { 1190 mtx_lock(&moea64_scratchpage_mtx); 1191 1192 moea64_set_scratchpage_pa(mmu, 0, pa); 1193 va = moea64_scratchpage_va[0]; 1194 } else { 1195 va = pa; 1196 } 1197 1198 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 1199 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 1200 1201 if (!hw_direct_map) 1202 mtx_unlock(&moea64_scratchpage_mtx); 1203 } 1204 1205 void 1206 moea64_zero_page_idle(mmu_t mmu, vm_page_t m) 1207 { 1208 1209 moea64_zero_page(mmu, m); 1210 } 1211 1212 /* 1213 * Map the given physical page at the specified virtual address in the 1214 * target pmap with the protection requested. If specified the page 1215 * will be wired down. 1216 */ 1217 1218 int 1219 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1220 vm_prot_t prot, u_int flags, int8_t psind) 1221 { 1222 struct pvo_entry *pvo, *oldpvo; 1223 struct pvo_head *pvo_head; 1224 uint64_t pte_lo; 1225 int error; 1226 1227 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1228 VM_OBJECT_ASSERT_LOCKED(m->object); 1229 1230 pvo = alloc_pvo_entry(0); 1231 pvo->pvo_pmap = NULL; /* to be filled in later */ 1232 pvo->pvo_pte.prot = prot; 1233 1234 pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1235 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; 1236 1237 if ((flags & PMAP_ENTER_WIRED) != 0) 1238 pvo->pvo_vaddr |= PVO_WIRED; 1239 1240 if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { 1241 pvo_head = NULL; 1242 } else { 1243 pvo_head = &m->md.mdpg_pvoh; 1244 pvo->pvo_vaddr |= PVO_MANAGED; 1245 } 1246 1247 for (;;) { 1248 PV_PAGE_LOCK(m); 1249 PMAP_LOCK(pmap); 1250 if (pvo->pvo_pmap == NULL) 1251 init_pvo_entry(pvo, pmap, va); 1252 if (prot & VM_PROT_WRITE) 1253 if (pmap_bootstrapped && 1254 (m->oflags & VPO_UNMANAGED) == 0) 1255 vm_page_aflag_set(m, PGA_WRITEABLE); 1256 1257 oldpvo = moea64_pvo_find_va(pmap, va); 1258 if (oldpvo != NULL) { 1259 if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && 1260 oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && 1261 oldpvo->pvo_pte.prot == prot) { 1262 /* Identical mapping already exists */ 1263 error = 0; 1264 1265 /* If not in page table, reinsert it */ 1266 if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { 1267 moea64_pte_overflow--; 1268 MOEA64_PTE_INSERT(mmu, oldpvo); 1269 } 1270 1271 /* Then just clean up and go home */ 1272 PV_PAGE_UNLOCK(m); 1273 PMAP_UNLOCK(pmap); 1274 free_pvo_entry(pvo); 1275 break; 1276 } 1277 1278 /* Otherwise, need to kill it first */ 1279 KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " 1280 "mapping does not match new mapping")); 1281 moea64_pvo_remove_from_pmap(mmu, oldpvo); 1282 } 1283 error = moea64_pvo_enter(mmu, pvo, pvo_head); 1284 PV_PAGE_UNLOCK(m); 1285 PMAP_UNLOCK(pmap); 1286 1287 /* Free any dead pages */ 1288 if (oldpvo != NULL) { 1289 PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1290 moea64_pvo_remove_from_page(mmu, oldpvo); 1291 PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1292 free_pvo_entry(oldpvo); 1293 } 1294 1295 if (error != ENOMEM) 1296 break; 1297 if ((flags & PMAP_ENTER_NOSLEEP) != 0) 1298 return (KERN_RESOURCE_SHORTAGE); 1299 VM_OBJECT_ASSERT_UNLOCKED(m->object); 1300 VM_WAIT; 1301 } 1302 1303 /* 1304 * Flush the page from the instruction cache if this page is 1305 * mapped executable and cacheable. 1306 */ 1307 if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && 1308 (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1309 vm_page_aflag_set(m, PGA_EXECUTABLE); 1310 moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1311 } 1312 return (KERN_SUCCESS); 1313 } 1314 1315 static void 1316 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, 1317 vm_size_t sz) 1318 { 1319 1320 /* 1321 * This is much trickier than on older systems because 1322 * we can't sync the icache on physical addresses directly 1323 * without a direct map. Instead we check a couple of cases 1324 * where the memory is already mapped in and, failing that, 1325 * use the same trick we use for page zeroing to create 1326 * a temporary mapping for this physical address. 1327 */ 1328 1329 if (!pmap_bootstrapped) { 1330 /* 1331 * If PMAP is not bootstrapped, we are likely to be 1332 * in real mode. 1333 */ 1334 __syncicache((void *)pa, sz); 1335 } else if (pmap == kernel_pmap) { 1336 __syncicache((void *)va, sz); 1337 } else if (hw_direct_map) { 1338 __syncicache((void *)pa, sz); 1339 } else { 1340 /* Use the scratch page to set up a temp mapping */ 1341 1342 mtx_lock(&moea64_scratchpage_mtx); 1343 1344 moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); 1345 __syncicache((void *)(moea64_scratchpage_va[1] + 1346 (va & ADDR_POFF)), sz); 1347 1348 mtx_unlock(&moea64_scratchpage_mtx); 1349 } 1350 } 1351 1352 /* 1353 * Maps a sequence of resident pages belonging to the same object. 1354 * The sequence begins with the given page m_start. This page is 1355 * mapped at the given virtual address start. Each subsequent page is 1356 * mapped at a virtual address that is offset from start by the same 1357 * amount as the page is offset from m_start within the object. The 1358 * last page in the sequence is the page with the largest offset from 1359 * m_start that can be mapped at a virtual address less than the given 1360 * virtual address end. Not every virtual page between start and end 1361 * is mapped; only those for which a resident page exists with the 1362 * corresponding offset from m_start are mapped. 1363 */ 1364 void 1365 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1366 vm_page_t m_start, vm_prot_t prot) 1367 { 1368 vm_page_t m; 1369 vm_pindex_t diff, psize; 1370 1371 VM_OBJECT_ASSERT_LOCKED(m_start->object); 1372 1373 psize = atop(end - start); 1374 m = m_start; 1375 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1376 moea64_enter(mmu, pm, start + ptoa(diff), m, prot & 1377 (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); 1378 m = TAILQ_NEXT(m, listq); 1379 } 1380 } 1381 1382 void 1383 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1384 vm_prot_t prot) 1385 { 1386 1387 moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), 1388 PMAP_ENTER_NOSLEEP, 0); 1389 } 1390 1391 vm_paddr_t 1392 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1393 { 1394 struct pvo_entry *pvo; 1395 vm_paddr_t pa; 1396 1397 PMAP_LOCK(pm); 1398 pvo = moea64_pvo_find_va(pm, va); 1399 if (pvo == NULL) 1400 pa = 0; 1401 else 1402 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1403 PMAP_UNLOCK(pm); 1404 1405 return (pa); 1406 } 1407 1408 /* 1409 * Atomically extract and hold the physical page with the given 1410 * pmap and virtual address pair if that mapping permits the given 1411 * protection. 1412 */ 1413 vm_page_t 1414 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1415 { 1416 struct pvo_entry *pvo; 1417 vm_page_t m; 1418 vm_paddr_t pa; 1419 1420 m = NULL; 1421 pa = 0; 1422 PMAP_LOCK(pmap); 1423 retry: 1424 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1425 if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { 1426 if (vm_page_pa_tryrelock(pmap, 1427 pvo->pvo_pte.pa & LPTE_RPGN, &pa)) 1428 goto retry; 1429 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1430 vm_page_hold(m); 1431 } 1432 PA_UNLOCK_COND(pa); 1433 PMAP_UNLOCK(pmap); 1434 return (m); 1435 } 1436 1437 static mmu_t installed_mmu; 1438 1439 static void * 1440 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, 1441 int wait) 1442 { 1443 struct pvo_entry *pvo; 1444 vm_offset_t va; 1445 vm_page_t m; 1446 int pflags, needed_lock; 1447 1448 /* 1449 * This entire routine is a horrible hack to avoid bothering kmem 1450 * for new KVA addresses. Because this can get called from inside 1451 * kmem allocation routines, calling kmem for a new address here 1452 * can lead to multiply locking non-recursive mutexes. 1453 */ 1454 1455 *flags = UMA_SLAB_PRIV; 1456 needed_lock = !PMAP_LOCKED(kernel_pmap); 1457 pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; 1458 1459 for (;;) { 1460 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 1461 if (m == NULL) { 1462 if (wait & M_NOWAIT) 1463 return (NULL); 1464 VM_WAIT; 1465 } else 1466 break; 1467 } 1468 1469 va = VM_PAGE_TO_PHYS(m); 1470 1471 pvo = alloc_pvo_entry(1 /* bootstrap */); 1472 1473 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; 1474 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; 1475 1476 if (needed_lock) 1477 PMAP_LOCK(kernel_pmap); 1478 1479 init_pvo_entry(pvo, kernel_pmap, va); 1480 pvo->pvo_vaddr |= PVO_WIRED; 1481 1482 moea64_pvo_enter(installed_mmu, pvo, NULL); 1483 1484 if (needed_lock) 1485 PMAP_UNLOCK(kernel_pmap); 1486 1487 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 1488 bzero((void *)va, PAGE_SIZE); 1489 1490 return (void *)va; 1491 } 1492 1493 extern int elf32_nxstack; 1494 1495 void 1496 moea64_init(mmu_t mmu) 1497 { 1498 1499 CTR0(KTR_PMAP, "moea64_init"); 1500 1501 moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1502 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1503 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1504 1505 if (!hw_direct_map) { 1506 installed_mmu = mmu; 1507 uma_zone_set_allocf(moea64_pvo_zone,moea64_uma_page_alloc); 1508 } 1509 1510 #ifdef COMPAT_FREEBSD32 1511 elf32_nxstack = 1; 1512 #endif 1513 1514 moea64_initialized = TRUE; 1515 } 1516 1517 boolean_t 1518 moea64_is_referenced(mmu_t mmu, vm_page_t m) 1519 { 1520 1521 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1522 ("moea64_is_referenced: page %p is not managed", m)); 1523 1524 return (moea64_query_bit(mmu, m, LPTE_REF)); 1525 } 1526 1527 boolean_t 1528 moea64_is_modified(mmu_t mmu, vm_page_t m) 1529 { 1530 1531 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1532 ("moea64_is_modified: page %p is not managed", m)); 1533 1534 /* 1535 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 1536 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 1537 * is clear, no PTEs can have LPTE_CHG set. 1538 */ 1539 VM_OBJECT_ASSERT_LOCKED(m->object); 1540 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 1541 return (FALSE); 1542 return (moea64_query_bit(mmu, m, LPTE_CHG)); 1543 } 1544 1545 boolean_t 1546 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1547 { 1548 struct pvo_entry *pvo; 1549 boolean_t rv = TRUE; 1550 1551 PMAP_LOCK(pmap); 1552 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1553 if (pvo != NULL) 1554 rv = FALSE; 1555 PMAP_UNLOCK(pmap); 1556 return (rv); 1557 } 1558 1559 void 1560 moea64_clear_modify(mmu_t mmu, vm_page_t m) 1561 { 1562 1563 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1564 ("moea64_clear_modify: page %p is not managed", m)); 1565 VM_OBJECT_ASSERT_WLOCKED(m->object); 1566 KASSERT(!vm_page_xbusied(m), 1567 ("moea64_clear_modify: page %p is exclusive busied", m)); 1568 1569 /* 1570 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG 1571 * set. If the object containing the page is locked and the page is 1572 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 1573 */ 1574 if ((m->aflags & PGA_WRITEABLE) == 0) 1575 return; 1576 moea64_clear_bit(mmu, m, LPTE_CHG); 1577 } 1578 1579 /* 1580 * Clear the write and modified bits in each of the given page's mappings. 1581 */ 1582 void 1583 moea64_remove_write(mmu_t mmu, vm_page_t m) 1584 { 1585 struct pvo_entry *pvo; 1586 int64_t refchg, ret; 1587 pmap_t pmap; 1588 1589 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1590 ("moea64_remove_write: page %p is not managed", m)); 1591 1592 /* 1593 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 1594 * set by another thread while the object is locked. Thus, 1595 * if PGA_WRITEABLE is clear, no page table entries need updating. 1596 */ 1597 VM_OBJECT_ASSERT_WLOCKED(m->object); 1598 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 1599 return; 1600 powerpc_sync(); 1601 PV_PAGE_LOCK(m); 1602 refchg = 0; 1603 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1604 pmap = pvo->pvo_pmap; 1605 PMAP_LOCK(pmap); 1606 if (!(pvo->pvo_vaddr & PVO_DEAD) && 1607 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1608 pvo->pvo_pte.prot &= ~VM_PROT_WRITE; 1609 ret = MOEA64_PTE_REPLACE(mmu, pvo, 1610 MOEA64_PTE_PROT_UPDATE); 1611 if (ret < 0) 1612 ret = LPTE_CHG; 1613 refchg |= ret; 1614 if (pvo->pvo_pmap == kernel_pmap) 1615 isync(); 1616 } 1617 PMAP_UNLOCK(pmap); 1618 } 1619 if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) 1620 vm_page_dirty(m); 1621 vm_page_aflag_clear(m, PGA_WRITEABLE); 1622 PV_PAGE_UNLOCK(m); 1623 } 1624 1625 /* 1626 * moea64_ts_referenced: 1627 * 1628 * Return a count of reference bits for a page, clearing those bits. 1629 * It is not necessary for every reference bit to be cleared, but it 1630 * is necessary that 0 only be returned when there are truly no 1631 * reference bits set. 1632 * 1633 * XXX: The exact number of bits to check and clear is a matter that 1634 * should be tested and standardized at some point in the future for 1635 * optimal aging of shared pages. 1636 */ 1637 int 1638 moea64_ts_referenced(mmu_t mmu, vm_page_t m) 1639 { 1640 1641 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1642 ("moea64_ts_referenced: page %p is not managed", m)); 1643 return (moea64_clear_bit(mmu, m, LPTE_REF)); 1644 } 1645 1646 /* 1647 * Modify the WIMG settings of all mappings for a page. 1648 */ 1649 void 1650 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1651 { 1652 struct pvo_entry *pvo; 1653 int64_t refchg; 1654 pmap_t pmap; 1655 uint64_t lo; 1656 1657 if ((m->oflags & VPO_UNMANAGED) != 0) { 1658 m->md.mdpg_cache_attrs = ma; 1659 return; 1660 } 1661 1662 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1663 1664 PV_PAGE_LOCK(m); 1665 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1666 pmap = pvo->pvo_pmap; 1667 PMAP_LOCK(pmap); 1668 if (!(pvo->pvo_vaddr & PVO_DEAD)) { 1669 pvo->pvo_pte.pa &= ~LPTE_WIMG; 1670 pvo->pvo_pte.pa |= lo; 1671 refchg = MOEA64_PTE_REPLACE(mmu, pvo, 1672 MOEA64_PTE_INVALIDATE); 1673 if (refchg < 0) 1674 refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? 1675 LPTE_CHG : 0; 1676 if ((pvo->pvo_vaddr & PVO_MANAGED) && 1677 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1678 refchg |= 1679 atomic_readandclear_32(&m->md.mdpg_attrs); 1680 if (refchg & LPTE_CHG) 1681 vm_page_dirty(m); 1682 if (refchg & LPTE_REF) 1683 vm_page_aflag_set(m, PGA_REFERENCED); 1684 } 1685 if (pvo->pvo_pmap == kernel_pmap) 1686 isync(); 1687 } 1688 PMAP_UNLOCK(pmap); 1689 } 1690 m->md.mdpg_cache_attrs = ma; 1691 PV_PAGE_UNLOCK(m); 1692 } 1693 1694 /* 1695 * Map a wired page into kernel virtual address space. 1696 */ 1697 void 1698 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1699 { 1700 int error; 1701 struct pvo_entry *pvo, *oldpvo; 1702 1703 pvo = alloc_pvo_entry(0); 1704 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 1705 pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); 1706 pvo->pvo_vaddr |= PVO_WIRED; 1707 1708 PMAP_LOCK(kernel_pmap); 1709 oldpvo = moea64_pvo_find_va(kernel_pmap, va); 1710 if (oldpvo != NULL) 1711 moea64_pvo_remove_from_pmap(mmu, oldpvo); 1712 init_pvo_entry(pvo, kernel_pmap, va); 1713 error = moea64_pvo_enter(mmu, pvo, NULL); 1714 PMAP_UNLOCK(kernel_pmap); 1715 1716 /* Free any dead pages */ 1717 if (oldpvo != NULL) { 1718 PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1719 moea64_pvo_remove_from_page(mmu, oldpvo); 1720 PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1721 free_pvo_entry(oldpvo); 1722 } 1723 1724 if (error != 0 && error != ENOENT) 1725 panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, 1726 pa, error); 1727 } 1728 1729 void 1730 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 1731 { 1732 1733 moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1734 } 1735 1736 /* 1737 * Extract the physical page address associated with the given kernel virtual 1738 * address. 1739 */ 1740 vm_paddr_t 1741 moea64_kextract(mmu_t mmu, vm_offset_t va) 1742 { 1743 struct pvo_entry *pvo; 1744 vm_paddr_t pa; 1745 1746 /* 1747 * Shortcut the direct-mapped case when applicable. We never put 1748 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. 1749 */ 1750 if (va < VM_MIN_KERNEL_ADDRESS) 1751 return (va); 1752 1753 PMAP_LOCK(kernel_pmap); 1754 pvo = moea64_pvo_find_va(kernel_pmap, va); 1755 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, 1756 va)); 1757 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1758 PMAP_UNLOCK(kernel_pmap); 1759 return (pa); 1760 } 1761 1762 /* 1763 * Remove a wired page from kernel virtual address space. 1764 */ 1765 void 1766 moea64_kremove(mmu_t mmu, vm_offset_t va) 1767 { 1768 moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1769 } 1770 1771 /* 1772 * Map a range of physical addresses into kernel virtual address space. 1773 * 1774 * The value passed in *virt is a suggested virtual address for the mapping. 1775 * Architectures which can support a direct-mapped physical to virtual region 1776 * can return the appropriate address within that region, leaving '*virt' 1777 * unchanged. Other architectures should map the pages starting at '*virt' and 1778 * update '*virt' with the first usable address after the mapped region. 1779 */ 1780 vm_offset_t 1781 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 1782 vm_paddr_t pa_end, int prot) 1783 { 1784 vm_offset_t sva, va; 1785 1786 if (hw_direct_map) { 1787 /* 1788 * Check if every page in the region is covered by the direct 1789 * map. The direct map covers all of physical memory. Use 1790 * moea64_calc_wimg() as a shortcut to see if the page is in 1791 * physical memory as a way to see if the direct map covers it. 1792 */ 1793 for (va = pa_start; va < pa_end; va += PAGE_SIZE) 1794 if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) 1795 break; 1796 if (va == pa_end) 1797 return (pa_start); 1798 } 1799 sva = *virt; 1800 va = sva; 1801 /* XXX respect prot argument */ 1802 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1803 moea64_kenter(mmu, va, pa_start); 1804 *virt = va; 1805 1806 return (sva); 1807 } 1808 1809 /* 1810 * Returns true if the pmap's pv is one of the first 1811 * 16 pvs linked to from this page. This count may 1812 * be changed upwards or downwards in the future; it 1813 * is only necessary that true be returned for a small 1814 * subset of pmaps for proper page aging. 1815 */ 1816 boolean_t 1817 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1818 { 1819 int loops; 1820 struct pvo_entry *pvo; 1821 boolean_t rv; 1822 1823 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1824 ("moea64_page_exists_quick: page %p is not managed", m)); 1825 loops = 0; 1826 rv = FALSE; 1827 PV_PAGE_LOCK(m); 1828 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1829 if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { 1830 rv = TRUE; 1831 break; 1832 } 1833 if (++loops >= 16) 1834 break; 1835 } 1836 PV_PAGE_UNLOCK(m); 1837 return (rv); 1838 } 1839 1840 /* 1841 * Return the number of managed mappings to the given physical page 1842 * that are wired. 1843 */ 1844 int 1845 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) 1846 { 1847 struct pvo_entry *pvo; 1848 int count; 1849 1850 count = 0; 1851 if ((m->oflags & VPO_UNMANAGED) != 0) 1852 return (count); 1853 PV_PAGE_LOCK(m); 1854 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1855 if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) 1856 count++; 1857 PV_PAGE_UNLOCK(m); 1858 return (count); 1859 } 1860 1861 static uintptr_t moea64_vsidcontext; 1862 1863 uintptr_t 1864 moea64_get_unique_vsid(void) { 1865 u_int entropy; 1866 register_t hash; 1867 uint32_t mask; 1868 int i; 1869 1870 entropy = 0; 1871 __asm __volatile("mftb %0" : "=r"(entropy)); 1872 1873 mtx_lock(&moea64_slb_mutex); 1874 for (i = 0; i < NVSIDS; i += VSID_NBPW) { 1875 u_int n; 1876 1877 /* 1878 * Create a new value by mutiplying by a prime and adding in 1879 * entropy from the timebase register. This is to make the 1880 * VSID more random so that the PT hash function collides 1881 * less often. (Note that the prime casues gcc to do shifts 1882 * instead of a multiply.) 1883 */ 1884 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; 1885 hash = moea64_vsidcontext & (NVSIDS - 1); 1886 if (hash == 0) /* 0 is special, avoid it */ 1887 continue; 1888 n = hash >> 5; 1889 mask = 1 << (hash & (VSID_NBPW - 1)); 1890 hash = (moea64_vsidcontext & VSID_HASHMASK); 1891 if (moea64_vsid_bitmap[n] & mask) { /* collision? */ 1892 /* anything free in this bucket? */ 1893 if (moea64_vsid_bitmap[n] == 0xffffffff) { 1894 entropy = (moea64_vsidcontext >> 20); 1895 continue; 1896 } 1897 i = ffs(~moea64_vsid_bitmap[n]) - 1; 1898 mask = 1 << i; 1899 hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); 1900 hash |= i; 1901 } 1902 if (hash == VSID_VRMA) /* also special, avoid this too */ 1903 continue; 1904 KASSERT(!(moea64_vsid_bitmap[n] & mask), 1905 ("Allocating in-use VSID %#zx\n", hash)); 1906 moea64_vsid_bitmap[n] |= mask; 1907 mtx_unlock(&moea64_slb_mutex); 1908 return (hash); 1909 } 1910 1911 mtx_unlock(&moea64_slb_mutex); 1912 panic("%s: out of segments",__func__); 1913 } 1914 1915 #ifdef __powerpc64__ 1916 void 1917 moea64_pinit(mmu_t mmu, pmap_t pmap) 1918 { 1919 1920 RB_INIT(&pmap->pmap_pvo); 1921 1922 pmap->pm_slb_tree_root = slb_alloc_tree(); 1923 pmap->pm_slb = slb_alloc_user_cache(); 1924 pmap->pm_slb_len = 0; 1925 } 1926 #else 1927 void 1928 moea64_pinit(mmu_t mmu, pmap_t pmap) 1929 { 1930 int i; 1931 uint32_t hash; 1932 1933 RB_INIT(&pmap->pmap_pvo); 1934 1935 if (pmap_bootstrapped) 1936 pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, 1937 (vm_offset_t)pmap); 1938 else 1939 pmap->pmap_phys = pmap; 1940 1941 /* 1942 * Allocate some segment registers for this pmap. 1943 */ 1944 hash = moea64_get_unique_vsid(); 1945 1946 for (i = 0; i < 16; i++) 1947 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1948 1949 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); 1950 } 1951 #endif 1952 1953 /* 1954 * Initialize the pmap associated with process 0. 1955 */ 1956 void 1957 moea64_pinit0(mmu_t mmu, pmap_t pm) 1958 { 1959 1960 PMAP_LOCK_INIT(pm); 1961 moea64_pinit(mmu, pm); 1962 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1963 } 1964 1965 /* 1966 * Set the physical protection on the specified range of this map as requested. 1967 */ 1968 static void 1969 moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) 1970 { 1971 struct vm_page *pg; 1972 vm_prot_t oldprot; 1973 int32_t refchg; 1974 1975 PMAP_LOCK_ASSERT(pm, MA_OWNED); 1976 1977 /* 1978 * Change the protection of the page. 1979 */ 1980 oldprot = pvo->pvo_pte.prot; 1981 pvo->pvo_pte.prot = prot; 1982 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1983 1984 /* 1985 * If the PVO is in the page table, update mapping 1986 */ 1987 refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); 1988 if (refchg < 0) 1989 refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; 1990 1991 if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && 1992 (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1993 if ((pg->oflags & VPO_UNMANAGED) == 0) 1994 vm_page_aflag_set(pg, PGA_EXECUTABLE); 1995 moea64_syncicache(mmu, pm, PVO_VADDR(pvo), 1996 pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE); 1997 } 1998 1999 /* 2000 * Update vm about the REF/CHG bits if the page is managed and we have 2001 * removed write access. 2002 */ 2003 if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) && 2004 (oldprot & VM_PROT_WRITE)) { 2005 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); 2006 if (refchg & LPTE_CHG) 2007 vm_page_dirty(pg); 2008 if (refchg & LPTE_REF) 2009 vm_page_aflag_set(pg, PGA_REFERENCED); 2010 } 2011 } 2012 2013 void 2014 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 2015 vm_prot_t prot) 2016 { 2017 struct pvo_entry *pvo, *tpvo, key; 2018 2019 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, 2020 sva, eva, prot); 2021 2022 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 2023 ("moea64_protect: non current pmap")); 2024 2025 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2026 moea64_remove(mmu, pm, sva, eva); 2027 return; 2028 } 2029 2030 PMAP_LOCK(pm); 2031 key.pvo_vaddr = sva; 2032 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 2033 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { 2034 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); 2035 moea64_pvo_protect(mmu, pm, pvo, prot); 2036 } 2037 PMAP_UNLOCK(pm); 2038 } 2039 2040 /* 2041 * Map a list of wired pages into kernel virtual address space. This is 2042 * intended for temporary mappings which do not need page modification or 2043 * references recorded. Existing mappings in the region are overwritten. 2044 */ 2045 void 2046 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) 2047 { 2048 while (count-- > 0) { 2049 moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2050 va += PAGE_SIZE; 2051 m++; 2052 } 2053 } 2054 2055 /* 2056 * Remove page mappings from kernel virtual address space. Intended for 2057 * temporary mappings entered by moea64_qenter. 2058 */ 2059 void 2060 moea64_qremove(mmu_t mmu, vm_offset_t va, int count) 2061 { 2062 while (count-- > 0) { 2063 moea64_kremove(mmu, va); 2064 va += PAGE_SIZE; 2065 } 2066 } 2067 2068 void 2069 moea64_release_vsid(uint64_t vsid) 2070 { 2071 int idx, mask; 2072 2073 mtx_lock(&moea64_slb_mutex); 2074 idx = vsid & (NVSIDS-1); 2075 mask = 1 << (idx % VSID_NBPW); 2076 idx /= VSID_NBPW; 2077 KASSERT(moea64_vsid_bitmap[idx] & mask, 2078 ("Freeing unallocated VSID %#jx", vsid)); 2079 moea64_vsid_bitmap[idx] &= ~mask; 2080 mtx_unlock(&moea64_slb_mutex); 2081 } 2082 2083 2084 void 2085 moea64_release(mmu_t mmu, pmap_t pmap) 2086 { 2087 2088 /* 2089 * Free segment registers' VSIDs 2090 */ 2091 #ifdef __powerpc64__ 2092 slb_free_tree(pmap); 2093 slb_free_user_cache(pmap->pm_slb); 2094 #else 2095 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); 2096 2097 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); 2098 #endif 2099 } 2100 2101 /* 2102 * Remove all pages mapped by the specified pmap 2103 */ 2104 void 2105 moea64_remove_pages(mmu_t mmu, pmap_t pm) 2106 { 2107 struct pvo_entry *pvo, *tpvo; 2108 struct pvo_tree tofree; 2109 2110 RB_INIT(&tofree); 2111 2112 PMAP_LOCK(pm); 2113 RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { 2114 if (pvo->pvo_vaddr & PVO_WIRED) 2115 continue; 2116 2117 /* 2118 * For locking reasons, remove this from the page table and 2119 * pmap, but save delinking from the vm_page for a second 2120 * pass 2121 */ 2122 moea64_pvo_remove_from_pmap(mmu, pvo); 2123 RB_INSERT(pvo_tree, &tofree, pvo); 2124 } 2125 PMAP_UNLOCK(pm); 2126 2127 RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { 2128 PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2129 moea64_pvo_remove_from_page(mmu, pvo); 2130 PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2131 RB_REMOVE(pvo_tree, &tofree, pvo); 2132 free_pvo_entry(pvo); 2133 } 2134 } 2135 2136 /* 2137 * Remove the given range of addresses from the specified map. 2138 */ 2139 void 2140 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 2141 { 2142 struct pvo_entry *pvo, *tpvo, key; 2143 struct pvo_tree tofree; 2144 2145 /* 2146 * Perform an unsynchronized read. This is, however, safe. 2147 */ 2148 if (pm->pm_stats.resident_count == 0) 2149 return; 2150 2151 key.pvo_vaddr = sva; 2152 2153 RB_INIT(&tofree); 2154 2155 PMAP_LOCK(pm); 2156 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 2157 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { 2158 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); 2159 2160 /* 2161 * For locking reasons, remove this from the page table and 2162 * pmap, but save delinking from the vm_page for a second 2163 * pass 2164 */ 2165 moea64_pvo_remove_from_pmap(mmu, pvo); 2166 RB_INSERT(pvo_tree, &tofree, pvo); 2167 } 2168 PMAP_UNLOCK(pm); 2169 2170 RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { 2171 PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2172 moea64_pvo_remove_from_page(mmu, pvo); 2173 PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2174 RB_REMOVE(pvo_tree, &tofree, pvo); 2175 free_pvo_entry(pvo); 2176 } 2177 } 2178 2179 /* 2180 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() 2181 * will reflect changes in pte's back to the vm_page. 2182 */ 2183 void 2184 moea64_remove_all(mmu_t mmu, vm_page_t m) 2185 { 2186 struct pvo_entry *pvo, *next_pvo; 2187 struct pvo_head freequeue; 2188 int wasdead; 2189 pmap_t pmap; 2190 2191 LIST_INIT(&freequeue); 2192 2193 PV_PAGE_LOCK(m); 2194 LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { 2195 pmap = pvo->pvo_pmap; 2196 PMAP_LOCK(pmap); 2197 wasdead = (pvo->pvo_vaddr & PVO_DEAD); 2198 if (!wasdead) 2199 moea64_pvo_remove_from_pmap(mmu, pvo); 2200 moea64_pvo_remove_from_page(mmu, pvo); 2201 if (!wasdead) 2202 LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink); 2203 PMAP_UNLOCK(pmap); 2204 2205 } 2206 KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); 2207 KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable")); 2208 PV_PAGE_UNLOCK(m); 2209 2210 /* Clean up UMA allocations */ 2211 LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo) 2212 free_pvo_entry(pvo); 2213 } 2214 2215 /* 2216 * Allocate a physical page of memory directly from the phys_avail map. 2217 * Can only be called from moea64_bootstrap before avail start and end are 2218 * calculated. 2219 */ 2220 vm_offset_t 2221 moea64_bootstrap_alloc(vm_size_t size, u_int align) 2222 { 2223 vm_offset_t s, e; 2224 int i, j; 2225 2226 size = round_page(size); 2227 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 2228 if (align != 0) 2229 s = (phys_avail[i] + align - 1) & ~(align - 1); 2230 else 2231 s = phys_avail[i]; 2232 e = s + size; 2233 2234 if (s < phys_avail[i] || e > phys_avail[i + 1]) 2235 continue; 2236 2237 if (s + size > platform_real_maxaddr()) 2238 continue; 2239 2240 if (s == phys_avail[i]) { 2241 phys_avail[i] += size; 2242 } else if (e == phys_avail[i + 1]) { 2243 phys_avail[i + 1] -= size; 2244 } else { 2245 for (j = phys_avail_count * 2; j > i; j -= 2) { 2246 phys_avail[j] = phys_avail[j - 2]; 2247 phys_avail[j + 1] = phys_avail[j - 1]; 2248 } 2249 2250 phys_avail[i + 3] = phys_avail[i + 1]; 2251 phys_avail[i + 1] = s; 2252 phys_avail[i + 2] = e; 2253 phys_avail_count++; 2254 } 2255 2256 return (s); 2257 } 2258 panic("moea64_bootstrap_alloc: could not allocate memory"); 2259 } 2260 2261 static int 2262 moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head) 2263 { 2264 int first, err; 2265 2266 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 2267 KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL, 2268 ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo))); 2269 2270 moea64_pvo_enter_calls++; 2271 2272 /* 2273 * Add to pmap list 2274 */ 2275 RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); 2276 2277 /* 2278 * Remember if the list was empty and therefore will be the first 2279 * item. 2280 */ 2281 if (pvo_head != NULL) { 2282 if (LIST_FIRST(pvo_head) == NULL) 2283 first = 1; 2284 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 2285 } 2286 2287 if (pvo->pvo_vaddr & PVO_WIRED) 2288 pvo->pvo_pmap->pm_stats.wired_count++; 2289 pvo->pvo_pmap->pm_stats.resident_count++; 2290 2291 /* 2292 * Insert it into the hardware page table 2293 */ 2294 err = MOEA64_PTE_INSERT(mmu, pvo); 2295 if (err != 0) { 2296 panic("moea64_pvo_enter: overflow"); 2297 } 2298 2299 moea64_pvo_entries++; 2300 2301 if (pvo->pvo_pmap == kernel_pmap) 2302 isync(); 2303 2304 #ifdef __powerpc64__ 2305 /* 2306 * Make sure all our bootstrap mappings are in the SLB as soon 2307 * as virtual memory is switched on. 2308 */ 2309 if (!pmap_bootstrapped) 2310 moea64_bootstrap_slb_prefault(PVO_VADDR(pvo), 2311 pvo->pvo_vaddr & PVO_LARGE); 2312 #endif 2313 2314 return (first ? ENOENT : 0); 2315 } 2316 2317 static void 2318 moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo) 2319 { 2320 struct vm_page *pg; 2321 int32_t refchg; 2322 2323 KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap")); 2324 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 2325 KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO")); 2326 2327 /* 2328 * If there is an active pte entry, we need to deactivate it 2329 */ 2330 refchg = MOEA64_PTE_UNSET(mmu, pvo); 2331 if (refchg < 0) { 2332 /* 2333 * If it was evicted from the page table, be pessimistic and 2334 * dirty the page. 2335 */ 2336 if (pvo->pvo_pte.prot & VM_PROT_WRITE) 2337 refchg = LPTE_CHG; 2338 else 2339 refchg = 0; 2340 } 2341 2342 /* 2343 * Update our statistics. 2344 */ 2345 pvo->pvo_pmap->pm_stats.resident_count--; 2346 if (pvo->pvo_vaddr & PVO_WIRED) 2347 pvo->pvo_pmap->pm_stats.wired_count--; 2348 2349 /* 2350 * Remove this PVO from the pmap list. 2351 */ 2352 RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); 2353 2354 /* 2355 * Mark this for the next sweep 2356 */ 2357 pvo->pvo_vaddr |= PVO_DEAD; 2358 2359 /* Send RC bits to VM */ 2360 if ((pvo->pvo_vaddr & PVO_MANAGED) && 2361 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 2362 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 2363 if (pg != NULL) { 2364 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); 2365 if (refchg & LPTE_CHG) 2366 vm_page_dirty(pg); 2367 if (refchg & LPTE_REF) 2368 vm_page_aflag_set(pg, PGA_REFERENCED); 2369 } 2370 } 2371 } 2372 2373 static void 2374 moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo) 2375 { 2376 struct vm_page *pg; 2377 2378 KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page")); 2379 2380 /* Use NULL pmaps as a sentinel for races in page deletion */ 2381 if (pvo->pvo_pmap == NULL) 2382 return; 2383 pvo->pvo_pmap = NULL; 2384 2385 /* 2386 * Update vm about page writeability/executability if managed 2387 */ 2388 PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN); 2389 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 2390 2391 if ((pvo->pvo_vaddr & PVO_MANAGED) && pg != NULL) { 2392 LIST_REMOVE(pvo, pvo_vlink); 2393 if (LIST_EMPTY(vm_page_to_pvoh(pg))) 2394 vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE); 2395 } 2396 2397 moea64_pvo_entries--; 2398 moea64_pvo_remove_calls++; 2399 } 2400 2401 static struct pvo_entry * 2402 moea64_pvo_find_va(pmap_t pm, vm_offset_t va) 2403 { 2404 struct pvo_entry key; 2405 2406 PMAP_LOCK_ASSERT(pm, MA_OWNED); 2407 2408 key.pvo_vaddr = va & ~ADDR_POFF; 2409 return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); 2410 } 2411 2412 static boolean_t 2413 moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit) 2414 { 2415 struct pvo_entry *pvo; 2416 int64_t ret; 2417 boolean_t rv; 2418 2419 /* 2420 * See if this bit is stored in the page already. 2421 */ 2422 if (m->md.mdpg_attrs & ptebit) 2423 return (TRUE); 2424 2425 /* 2426 * Examine each PTE. Sync so that any pending REF/CHG bits are 2427 * flushed to the PTEs. 2428 */ 2429 rv = FALSE; 2430 powerpc_sync(); 2431 PV_PAGE_LOCK(m); 2432 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2433 ret = 0; 2434 2435 /* 2436 * See if this pvo has a valid PTE. if so, fetch the 2437 * REF/CHG bits from the valid PTE. If the appropriate 2438 * ptebit is set, return success. 2439 */ 2440 PMAP_LOCK(pvo->pvo_pmap); 2441 if (!(pvo->pvo_vaddr & PVO_DEAD)) 2442 ret = MOEA64_PTE_SYNCH(mmu, pvo); 2443 PMAP_UNLOCK(pvo->pvo_pmap); 2444 2445 if (ret > 0) { 2446 atomic_set_32(&m->md.mdpg_attrs, 2447 ret & (LPTE_CHG | LPTE_REF)); 2448 if (ret & ptebit) { 2449 rv = TRUE; 2450 break; 2451 } 2452 } 2453 } 2454 PV_PAGE_UNLOCK(m); 2455 2456 return (rv); 2457 } 2458 2459 static u_int 2460 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2461 { 2462 u_int count; 2463 struct pvo_entry *pvo; 2464 int64_t ret; 2465 2466 /* 2467 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2468 * we can reset the right ones). 2469 */ 2470 powerpc_sync(); 2471 2472 /* 2473 * For each pvo entry, clear the pte's ptebit. 2474 */ 2475 count = 0; 2476 PV_PAGE_LOCK(m); 2477 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2478 ret = 0; 2479 2480 PMAP_LOCK(pvo->pvo_pmap); 2481 if (!(pvo->pvo_vaddr & PVO_DEAD)) 2482 ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit); 2483 PMAP_UNLOCK(pvo->pvo_pmap); 2484 2485 if (ret > 0 && (ret & ptebit)) 2486 count++; 2487 } 2488 atomic_clear_32(&m->md.mdpg_attrs, ptebit); 2489 PV_PAGE_UNLOCK(m); 2490 2491 return (count); 2492 } 2493 2494 boolean_t 2495 moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 2496 { 2497 struct pvo_entry *pvo, key; 2498 vm_offset_t ppa; 2499 int error = 0; 2500 2501 PMAP_LOCK(kernel_pmap); 2502 key.pvo_vaddr = ppa = pa & ~ADDR_POFF; 2503 for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); 2504 ppa < pa + size; ppa += PAGE_SIZE, 2505 pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { 2506 if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) { 2507 error = EFAULT; 2508 break; 2509 } 2510 } 2511 PMAP_UNLOCK(kernel_pmap); 2512 2513 return (error); 2514 } 2515 2516 /* 2517 * Map a set of physical memory pages into the kernel virtual 2518 * address space. Return a pointer to where it is mapped. This 2519 * routine is intended to be used for mapping device memory, 2520 * NOT real memory. 2521 */ 2522 void * 2523 moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2524 { 2525 vm_offset_t va, tmpva, ppa, offset; 2526 2527 ppa = trunc_page(pa); 2528 offset = pa & PAGE_MASK; 2529 size = roundup2(offset + size, PAGE_SIZE); 2530 2531 va = kva_alloc(size); 2532 2533 if (!va) 2534 panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); 2535 2536 for (tmpva = va; size > 0;) { 2537 moea64_kenter_attr(mmu, tmpva, ppa, ma); 2538 size -= PAGE_SIZE; 2539 tmpva += PAGE_SIZE; 2540 ppa += PAGE_SIZE; 2541 } 2542 2543 return ((void *)(va + offset)); 2544 } 2545 2546 void * 2547 moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 2548 { 2549 2550 return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); 2551 } 2552 2553 void 2554 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2555 { 2556 vm_offset_t base, offset; 2557 2558 base = trunc_page(va); 2559 offset = va & PAGE_MASK; 2560 size = roundup2(offset + size, PAGE_SIZE); 2561 2562 kva_free(base, size); 2563 } 2564 2565 void 2566 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2567 { 2568 struct pvo_entry *pvo; 2569 vm_offset_t lim; 2570 vm_paddr_t pa; 2571 vm_size_t len; 2572 2573 PMAP_LOCK(pm); 2574 while (sz > 0) { 2575 lim = round_page(va); 2576 len = MIN(lim - va, sz); 2577 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 2578 if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { 2579 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF); 2580 moea64_syncicache(mmu, pm, va, pa, len); 2581 } 2582 va += len; 2583 sz -= len; 2584 } 2585 PMAP_UNLOCK(pm); 2586 } 2587 2588 void 2589 moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 2590 { 2591 2592 *va = (void *)pa; 2593 } 2594 2595 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 2596 2597 void 2598 moea64_scan_init(mmu_t mmu) 2599 { 2600 struct pvo_entry *pvo; 2601 vm_offset_t va; 2602 int i; 2603 2604 if (!do_minidump) { 2605 /* Initialize phys. segments for dumpsys(). */ 2606 memset(&dump_map, 0, sizeof(dump_map)); 2607 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 2608 for (i = 0; i < pregions_sz; i++) { 2609 dump_map[i].pa_start = pregions[i].mr_start; 2610 dump_map[i].pa_size = pregions[i].mr_size; 2611 } 2612 return; 2613 } 2614 2615 /* Virtual segments for minidumps: */ 2616 memset(&dump_map, 0, sizeof(dump_map)); 2617 2618 /* 1st: kernel .data and .bss. */ 2619 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 2620 dump_map[0].pa_size = round_page((uintptr_t)_end) - 2621 dump_map[0].pa_start; 2622 2623 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 2624 dump_map[1].pa_start = (vm_paddr_t)msgbufp->msg_ptr; 2625 dump_map[1].pa_size = round_page(msgbufp->msg_size); 2626 2627 /* 3rd: kernel VM. */ 2628 va = dump_map[1].pa_start + dump_map[1].pa_size; 2629 /* Find start of next chunk (from va). */ 2630 while (va < virtual_end) { 2631 /* Don't dump the buffer cache. */ 2632 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 2633 va = kmi.buffer_eva; 2634 continue; 2635 } 2636 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); 2637 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) 2638 break; 2639 va += PAGE_SIZE; 2640 } 2641 if (va < virtual_end) { 2642 dump_map[2].pa_start = va; 2643 va += PAGE_SIZE; 2644 /* Find last page in chunk. */ 2645 while (va < virtual_end) { 2646 /* Don't run into the buffer cache. */ 2647 if (va == kmi.buffer_sva) 2648 break; 2649 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); 2650 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) 2651 break; 2652 va += PAGE_SIZE; 2653 } 2654 dump_map[2].pa_size = va - dump_map[2].pa_start; 2655 } 2656 } 2657 2658