1 /*- 2 * Copyright (c) 2008-2015 Nathan Whitehorn 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 /* 31 * Manages physical address maps. 32 * 33 * Since the information managed by this module is also stored by the 34 * logical address mapping module, this module may throw away valid virtual 35 * to physical mappings at almost any time. However, invalidations of 36 * mappings must be done as requested. 37 * 38 * In order to cope with hardware architectures which make virtual to 39 * physical map invalidates expensive, this module may delay invalidate 40 * reduced protection operations until such time as they are actually 41 * necessary. This module is given full information as to which processors 42 * are currently using which maps, and to when physical maps must be made 43 * correct. 44 */ 45 46 #include "opt_compat.h" 47 #include "opt_kstack_pages.h" 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/conf.h> 52 #include <sys/queue.h> 53 #include <sys/cpuset.h> 54 #include <sys/kerneldump.h> 55 #include <sys/ktr.h> 56 #include <sys/lock.h> 57 #include <sys/msgbuf.h> 58 #include <sys/malloc.h> 59 #include <sys/mutex.h> 60 #include <sys/proc.h> 61 #include <sys/rwlock.h> 62 #include <sys/sched.h> 63 #include <sys/sysctl.h> 64 #include <sys/systm.h> 65 #include <sys/vmmeter.h> 66 67 #include <sys/kdb.h> 68 69 #include <dev/ofw/openfirm.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_param.h> 73 #include <vm/vm_kern.h> 74 #include <vm/vm_page.h> 75 #include <vm/vm_map.h> 76 #include <vm/vm_object.h> 77 #include <vm/vm_extern.h> 78 #include <vm/vm_pageout.h> 79 #include <vm/uma.h> 80 81 #include <machine/_inttypes.h> 82 #include <machine/cpu.h> 83 #include <machine/platform.h> 84 #include <machine/frame.h> 85 #include <machine/md_var.h> 86 #include <machine/psl.h> 87 #include <machine/bat.h> 88 #include <machine/hid.h> 89 #include <machine/pte.h> 90 #include <machine/sr.h> 91 #include <machine/trap.h> 92 #include <machine/mmuvar.h> 93 94 #include "mmu_oea64.h" 95 #include "mmu_if.h" 96 #include "moea64_if.h" 97 98 void moea64_release_vsid(uint64_t vsid); 99 uintptr_t moea64_get_unique_vsid(void); 100 101 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 102 #define ENABLE_TRANS(msr) mtmsr(msr) 103 104 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) 105 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) 106 #define VSID_HASH_MASK 0x0000007fffffffffULL 107 108 /* 109 * Locking semantics: 110 * 111 * There are two locks of interest: the page locks and the pmap locks, which 112 * protect their individual PVO lists and are locked in that order. The contents 113 * of all PVO entries are protected by the locks of their respective pmaps. 114 * The pmap of any PVO is guaranteed not to change so long as the PVO is linked 115 * into any list. 116 * 117 */ 118 119 #define PV_LOCK_COUNT PA_LOCK_COUNT*3 120 static struct mtx_padalign pv_lock[PV_LOCK_COUNT]; 121 122 #define PV_LOCKPTR(pa) ((struct mtx *)(&pv_lock[pa_index(pa) % PV_LOCK_COUNT])) 123 #define PV_LOCK(pa) mtx_lock(PV_LOCKPTR(pa)) 124 #define PV_UNLOCK(pa) mtx_unlock(PV_LOCKPTR(pa)) 125 #define PV_LOCKASSERT(pa) mtx_assert(PV_LOCKPTR(pa), MA_OWNED) 126 #define PV_PAGE_LOCK(m) PV_LOCK(VM_PAGE_TO_PHYS(m)) 127 #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m)) 128 #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m)) 129 130 struct ofw_map { 131 cell_t om_va; 132 cell_t om_len; 133 uint64_t om_pa; 134 cell_t om_mode; 135 }; 136 137 extern unsigned char _etext[]; 138 extern unsigned char _end[]; 139 140 extern int ofw_real_mode; 141 142 /* 143 * Map of physical memory regions. 144 */ 145 static struct mem_region *regions; 146 static struct mem_region *pregions; 147 static u_int phys_avail_count; 148 static int regions_sz, pregions_sz; 149 150 extern void bs_remap_earlyboot(void); 151 152 /* 153 * Lock for the SLB tables. 154 */ 155 struct mtx moea64_slb_mutex; 156 157 /* 158 * PTEG data. 159 */ 160 u_int moea64_pteg_count; 161 u_int moea64_pteg_mask; 162 163 /* 164 * PVO data. 165 */ 166 167 uma_zone_t moea64_pvo_zone; /* zone for pvo entries */ 168 169 static struct pvo_entry *moea64_bpvo_pool; 170 static int moea64_bpvo_pool_index = 0; 171 static int moea64_bpvo_pool_size = 327680; 172 TUNABLE_INT("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size); 173 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD, 174 &moea64_bpvo_pool_index, 0, ""); 175 176 #define VSID_NBPW (sizeof(u_int32_t) * 8) 177 #ifdef __powerpc64__ 178 #define NVSIDS (NPMAPS * 16) 179 #define VSID_HASHMASK 0xffffffffUL 180 #else 181 #define NVSIDS NPMAPS 182 #define VSID_HASHMASK 0xfffffUL 183 #endif 184 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; 185 186 static boolean_t moea64_initialized = FALSE; 187 188 /* 189 * Statistics. 190 */ 191 u_int moea64_pte_valid = 0; 192 u_int moea64_pte_overflow = 0; 193 u_int moea64_pvo_entries = 0; 194 u_int moea64_pvo_enter_calls = 0; 195 u_int moea64_pvo_remove_calls = 0; 196 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 197 &moea64_pte_valid, 0, ""); 198 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, 199 &moea64_pte_overflow, 0, ""); 200 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 201 &moea64_pvo_entries, 0, ""); 202 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, 203 &moea64_pvo_enter_calls, 0, ""); 204 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, 205 &moea64_pvo_remove_calls, 0, ""); 206 207 vm_offset_t moea64_scratchpage_va[2]; 208 struct pvo_entry *moea64_scratchpage_pvo[2]; 209 struct mtx moea64_scratchpage_mtx; 210 211 uint64_t moea64_large_page_mask = 0; 212 uint64_t moea64_large_page_size = 0; 213 int moea64_large_page_shift = 0; 214 215 /* 216 * PVO calls. 217 */ 218 static int moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, 219 struct pvo_head *pvo_head); 220 static void moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo); 221 static void moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo); 222 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); 223 224 /* 225 * Utility routines. 226 */ 227 static boolean_t moea64_query_bit(mmu_t, vm_page_t, uint64_t); 228 static u_int moea64_clear_bit(mmu_t, vm_page_t, uint64_t); 229 static void moea64_kremove(mmu_t, vm_offset_t); 230 static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 231 vm_offset_t pa, vm_size_t sz); 232 233 /* 234 * Kernel MMU interface 235 */ 236 void moea64_clear_modify(mmu_t, vm_page_t); 237 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); 238 void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 239 vm_page_t *mb, vm_offset_t b_offset, int xfersize); 240 int moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, 241 u_int flags, int8_t psind); 242 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 243 vm_prot_t); 244 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 245 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); 246 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 247 void moea64_init(mmu_t); 248 boolean_t moea64_is_modified(mmu_t, vm_page_t); 249 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 250 boolean_t moea64_is_referenced(mmu_t, vm_page_t); 251 int moea64_ts_referenced(mmu_t, vm_page_t); 252 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int); 253 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); 254 int moea64_page_wired_mappings(mmu_t, vm_page_t); 255 void moea64_pinit(mmu_t, pmap_t); 256 void moea64_pinit0(mmu_t, pmap_t); 257 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 258 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 259 void moea64_qremove(mmu_t, vm_offset_t, int); 260 void moea64_release(mmu_t, pmap_t); 261 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 262 void moea64_remove_pages(mmu_t, pmap_t); 263 void moea64_remove_all(mmu_t, vm_page_t); 264 void moea64_remove_write(mmu_t, vm_page_t); 265 void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 266 void moea64_zero_page(mmu_t, vm_page_t); 267 void moea64_zero_page_area(mmu_t, vm_page_t, int, int); 268 void moea64_zero_page_idle(mmu_t, vm_page_t); 269 void moea64_activate(mmu_t, struct thread *); 270 void moea64_deactivate(mmu_t, struct thread *); 271 void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t); 272 void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 273 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); 274 vm_paddr_t moea64_kextract(mmu_t, vm_offset_t); 275 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); 276 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma); 277 void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t); 278 boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t); 279 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 280 void moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, 281 void **va); 282 void moea64_scan_init(mmu_t mmu); 283 284 static mmu_method_t moea64_methods[] = { 285 MMUMETHOD(mmu_clear_modify, moea64_clear_modify), 286 MMUMETHOD(mmu_copy_page, moea64_copy_page), 287 MMUMETHOD(mmu_copy_pages, moea64_copy_pages), 288 MMUMETHOD(mmu_enter, moea64_enter), 289 MMUMETHOD(mmu_enter_object, moea64_enter_object), 290 MMUMETHOD(mmu_enter_quick, moea64_enter_quick), 291 MMUMETHOD(mmu_extract, moea64_extract), 292 MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), 293 MMUMETHOD(mmu_init, moea64_init), 294 MMUMETHOD(mmu_is_modified, moea64_is_modified), 295 MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), 296 MMUMETHOD(mmu_is_referenced, moea64_is_referenced), 297 MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), 298 MMUMETHOD(mmu_map, moea64_map), 299 MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), 300 MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), 301 MMUMETHOD(mmu_pinit, moea64_pinit), 302 MMUMETHOD(mmu_pinit0, moea64_pinit0), 303 MMUMETHOD(mmu_protect, moea64_protect), 304 MMUMETHOD(mmu_qenter, moea64_qenter), 305 MMUMETHOD(mmu_qremove, moea64_qremove), 306 MMUMETHOD(mmu_release, moea64_release), 307 MMUMETHOD(mmu_remove, moea64_remove), 308 MMUMETHOD(mmu_remove_pages, moea64_remove_pages), 309 MMUMETHOD(mmu_remove_all, moea64_remove_all), 310 MMUMETHOD(mmu_remove_write, moea64_remove_write), 311 MMUMETHOD(mmu_sync_icache, moea64_sync_icache), 312 MMUMETHOD(mmu_unwire, moea64_unwire), 313 MMUMETHOD(mmu_zero_page, moea64_zero_page), 314 MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), 315 MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), 316 MMUMETHOD(mmu_activate, moea64_activate), 317 MMUMETHOD(mmu_deactivate, moea64_deactivate), 318 MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), 319 320 /* Internal interfaces */ 321 MMUMETHOD(mmu_mapdev, moea64_mapdev), 322 MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), 323 MMUMETHOD(mmu_unmapdev, moea64_unmapdev), 324 MMUMETHOD(mmu_kextract, moea64_kextract), 325 MMUMETHOD(mmu_kenter, moea64_kenter), 326 MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), 327 MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), 328 MMUMETHOD(mmu_scan_init, moea64_scan_init), 329 MMUMETHOD(mmu_dumpsys_map, moea64_dumpsys_map), 330 331 { 0, 0 } 332 }; 333 334 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); 335 336 static struct pvo_head * 337 vm_page_to_pvoh(vm_page_t m) 338 { 339 340 mtx_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), MA_OWNED); 341 return (&m->md.mdpg_pvoh); 342 } 343 344 static struct pvo_entry * 345 alloc_pvo_entry(int bootstrap) 346 { 347 struct pvo_entry *pvo; 348 349 if (!moea64_initialized || bootstrap) { 350 if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) { 351 panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", 352 moea64_bpvo_pool_index, moea64_bpvo_pool_size, 353 moea64_bpvo_pool_size * sizeof(struct pvo_entry)); 354 } 355 pvo = &moea64_bpvo_pool[ 356 atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)]; 357 bzero(pvo, sizeof(*pvo)); 358 pvo->pvo_vaddr = PVO_BOOTSTRAP; 359 } else { 360 pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT); 361 bzero(pvo, sizeof(*pvo)); 362 } 363 364 return (pvo); 365 } 366 367 368 static void 369 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va) 370 { 371 uint64_t vsid; 372 uint64_t hash; 373 int shift; 374 375 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 376 377 pvo->pvo_pmap = pmap; 378 va &= ~ADDR_POFF; 379 pvo->pvo_vaddr |= va; 380 vsid = va_to_vsid(pmap, va); 381 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) 382 | (vsid << 16); 383 384 shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift : 385 ADDR_PIDX_SHFT; 386 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift); 387 pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3; 388 } 389 390 static void 391 free_pvo_entry(struct pvo_entry *pvo) 392 { 393 394 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 395 uma_zfree(moea64_pvo_zone, pvo); 396 } 397 398 void 399 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte) 400 { 401 402 lpte->pte_hi = (pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & 403 LPTE_AVPN_MASK; 404 lpte->pte_hi |= LPTE_VALID; 405 406 if (pvo->pvo_vaddr & PVO_LARGE) 407 lpte->pte_hi |= LPTE_BIG; 408 if (pvo->pvo_vaddr & PVO_WIRED) 409 lpte->pte_hi |= LPTE_WIRED; 410 if (pvo->pvo_vaddr & PVO_HID) 411 lpte->pte_hi |= LPTE_HID; 412 413 lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */ 414 if (pvo->pvo_pte.prot & VM_PROT_WRITE) 415 lpte->pte_lo |= LPTE_BW; 416 else 417 lpte->pte_lo |= LPTE_BR; 418 419 if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE)) 420 lpte->pte_lo |= LPTE_NOEXEC; 421 } 422 423 static __inline uint64_t 424 moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 425 { 426 uint64_t pte_lo; 427 int i; 428 429 if (ma != VM_MEMATTR_DEFAULT) { 430 switch (ma) { 431 case VM_MEMATTR_UNCACHEABLE: 432 return (LPTE_I | LPTE_G); 433 case VM_MEMATTR_WRITE_COMBINING: 434 case VM_MEMATTR_WRITE_BACK: 435 case VM_MEMATTR_PREFETCHABLE: 436 return (LPTE_I); 437 case VM_MEMATTR_WRITE_THROUGH: 438 return (LPTE_W | LPTE_M); 439 } 440 } 441 442 /* 443 * Assume the page is cache inhibited and access is guarded unless 444 * it's in our available memory array. 445 */ 446 pte_lo = LPTE_I | LPTE_G; 447 for (i = 0; i < pregions_sz; i++) { 448 if ((pa >= pregions[i].mr_start) && 449 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 450 pte_lo &= ~(LPTE_I | LPTE_G); 451 pte_lo |= LPTE_M; 452 break; 453 } 454 } 455 456 return pte_lo; 457 } 458 459 /* 460 * Quick sort callout for comparing memory regions. 461 */ 462 static int om_cmp(const void *a, const void *b); 463 464 static int 465 om_cmp(const void *a, const void *b) 466 { 467 const struct ofw_map *mapa; 468 const struct ofw_map *mapb; 469 470 mapa = a; 471 mapb = b; 472 if (mapa->om_pa < mapb->om_pa) 473 return (-1); 474 else if (mapa->om_pa > mapb->om_pa) 475 return (1); 476 else 477 return (0); 478 } 479 480 static void 481 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) 482 { 483 struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */ 484 pcell_t acells, trans_cells[sz/sizeof(cell_t)]; 485 struct pvo_entry *pvo; 486 register_t msr; 487 vm_offset_t off; 488 vm_paddr_t pa_base; 489 int i, j; 490 491 bzero(translations, sz); 492 OF_getprop(OF_finddevice("/"), "#address-cells", &acells, 493 sizeof(acells)); 494 if (OF_getprop(mmu, "translations", trans_cells, sz) == -1) 495 panic("moea64_bootstrap: can't get ofw translations"); 496 497 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); 498 sz /= sizeof(cell_t); 499 for (i = 0, j = 0; i < sz; j++) { 500 translations[j].om_va = trans_cells[i++]; 501 translations[j].om_len = trans_cells[i++]; 502 translations[j].om_pa = trans_cells[i++]; 503 if (acells == 2) { 504 translations[j].om_pa <<= 32; 505 translations[j].om_pa |= trans_cells[i++]; 506 } 507 translations[j].om_mode = trans_cells[i++]; 508 } 509 KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)", 510 i, sz)); 511 512 sz = j; 513 qsort(translations, sz, sizeof (*translations), om_cmp); 514 515 for (i = 0; i < sz; i++) { 516 pa_base = translations[i].om_pa; 517 #ifndef __powerpc64__ 518 if ((translations[i].om_pa >> 32) != 0) 519 panic("OFW translations above 32-bit boundary!"); 520 #endif 521 522 if (pa_base % PAGE_SIZE) 523 panic("OFW translation not page-aligned (phys)!"); 524 if (translations[i].om_va % PAGE_SIZE) 525 panic("OFW translation not page-aligned (virt)!"); 526 527 CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x", 528 pa_base, translations[i].om_va, translations[i].om_len); 529 530 /* Now enter the pages for this mapping */ 531 532 DISABLE_TRANS(msr); 533 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 534 /* If this address is direct-mapped, skip remapping */ 535 if (hw_direct_map && translations[i].om_va == pa_base && 536 moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M) 537 continue; 538 539 PMAP_LOCK(kernel_pmap); 540 pvo = moea64_pvo_find_va(kernel_pmap, 541 translations[i].om_va + off); 542 PMAP_UNLOCK(kernel_pmap); 543 if (pvo != NULL) 544 continue; 545 546 moea64_kenter(mmup, translations[i].om_va + off, 547 pa_base + off); 548 } 549 ENABLE_TRANS(msr); 550 } 551 } 552 553 #ifdef __powerpc64__ 554 static void 555 moea64_probe_large_page(void) 556 { 557 uint16_t pvr = mfpvr() >> 16; 558 559 switch (pvr) { 560 case IBM970: 561 case IBM970FX: 562 case IBM970MP: 563 powerpc_sync(); isync(); 564 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); 565 powerpc_sync(); isync(); 566 567 /* FALLTHROUGH */ 568 default: 569 moea64_large_page_size = 0x1000000; /* 16 MB */ 570 moea64_large_page_shift = 24; 571 } 572 573 moea64_large_page_mask = moea64_large_page_size - 1; 574 } 575 576 static void 577 moea64_bootstrap_slb_prefault(vm_offset_t va, int large) 578 { 579 struct slb *cache; 580 struct slb entry; 581 uint64_t esid, slbe; 582 uint64_t i; 583 584 cache = PCPU_GET(slb); 585 esid = va >> ADDR_SR_SHFT; 586 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; 587 588 for (i = 0; i < 64; i++) { 589 if (cache[i].slbe == (slbe | i)) 590 return; 591 } 592 593 entry.slbe = slbe; 594 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; 595 if (large) 596 entry.slbv |= SLBV_L; 597 598 slb_insert_kernel(entry.slbe, entry.slbv); 599 } 600 #endif 601 602 static void 603 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, 604 vm_offset_t kernelend) 605 { 606 struct pvo_entry *pvo; 607 register_t msr; 608 vm_paddr_t pa; 609 vm_offset_t size, off; 610 uint64_t pte_lo; 611 int i; 612 613 if (moea64_large_page_size == 0) 614 hw_direct_map = 0; 615 616 DISABLE_TRANS(msr); 617 if (hw_direct_map) { 618 PMAP_LOCK(kernel_pmap); 619 for (i = 0; i < pregions_sz; i++) { 620 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + 621 pregions[i].mr_size; pa += moea64_large_page_size) { 622 pte_lo = LPTE_M; 623 624 pvo = alloc_pvo_entry(1 /* bootstrap */); 625 pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE; 626 init_pvo_entry(pvo, kernel_pmap, pa); 627 628 /* 629 * Set memory access as guarded if prefetch within 630 * the page could exit the available physmem area. 631 */ 632 if (pa & moea64_large_page_mask) { 633 pa &= moea64_large_page_mask; 634 pte_lo |= LPTE_G; 635 } 636 if (pa + moea64_large_page_size > 637 pregions[i].mr_start + pregions[i].mr_size) 638 pte_lo |= LPTE_G; 639 640 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | 641 VM_PROT_EXECUTE; 642 pvo->pvo_pte.pa = pa | pte_lo; 643 moea64_pvo_enter(mmup, pvo, NULL); 644 } 645 } 646 PMAP_UNLOCK(kernel_pmap); 647 } else { 648 size = moea64_bpvo_pool_size*sizeof(struct pvo_entry); 649 off = (vm_offset_t)(moea64_bpvo_pool); 650 for (pa = off; pa < off + size; pa += PAGE_SIZE) 651 moea64_kenter(mmup, pa, pa); 652 653 /* 654 * Map certain important things, like ourselves. 655 * 656 * NOTE: We do not map the exception vector space. That code is 657 * used only in real mode, and leaving it unmapped allows us to 658 * catch NULL pointer deferences, instead of making NULL a valid 659 * address. 660 */ 661 662 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; 663 pa += PAGE_SIZE) 664 moea64_kenter(mmup, pa, pa); 665 } 666 ENABLE_TRANS(msr); 667 668 /* 669 * Allow user to override unmapped_buf_allowed for testing. 670 * XXXKIB Only direct map implementation was tested. 671 */ 672 if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", 673 &unmapped_buf_allowed)) 674 unmapped_buf_allowed = hw_direct_map; 675 } 676 677 void 678 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 679 { 680 int i, j; 681 vm_size_t physsz, hwphyssz; 682 683 #ifndef __powerpc64__ 684 /* We don't have a direct map since there is no BAT */ 685 hw_direct_map = 0; 686 687 /* Make sure battable is zero, since we have no BAT */ 688 for (i = 0; i < 16; i++) { 689 battable[i].batu = 0; 690 battable[i].batl = 0; 691 } 692 #else 693 moea64_probe_large_page(); 694 695 /* Use a direct map if we have large page support */ 696 if (moea64_large_page_size > 0) 697 hw_direct_map = 1; 698 else 699 hw_direct_map = 0; 700 #endif 701 702 /* Get physical memory regions from firmware */ 703 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 704 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); 705 706 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 707 panic("moea64_bootstrap: phys_avail too small"); 708 709 phys_avail_count = 0; 710 physsz = 0; 711 hwphyssz = 0; 712 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 713 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 714 CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)", 715 regions[i].mr_start, regions[i].mr_start + 716 regions[i].mr_size, regions[i].mr_size); 717 if (hwphyssz != 0 && 718 (physsz + regions[i].mr_size) >= hwphyssz) { 719 if (physsz < hwphyssz) { 720 phys_avail[j] = regions[i].mr_start; 721 phys_avail[j + 1] = regions[i].mr_start + 722 hwphyssz - physsz; 723 physsz = hwphyssz; 724 phys_avail_count++; 725 } 726 break; 727 } 728 phys_avail[j] = regions[i].mr_start; 729 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 730 phys_avail_count++; 731 physsz += regions[i].mr_size; 732 } 733 734 /* Check for overlap with the kernel and exception vectors */ 735 for (j = 0; j < 2*phys_avail_count; j+=2) { 736 if (phys_avail[j] < EXC_LAST) 737 phys_avail[j] += EXC_LAST; 738 739 if (kernelstart >= phys_avail[j] && 740 kernelstart < phys_avail[j+1]) { 741 if (kernelend < phys_avail[j+1]) { 742 phys_avail[2*phys_avail_count] = 743 (kernelend & ~PAGE_MASK) + PAGE_SIZE; 744 phys_avail[2*phys_avail_count + 1] = 745 phys_avail[j+1]; 746 phys_avail_count++; 747 } 748 749 phys_avail[j+1] = kernelstart & ~PAGE_MASK; 750 } 751 752 if (kernelend >= phys_avail[j] && 753 kernelend < phys_avail[j+1]) { 754 if (kernelstart > phys_avail[j]) { 755 phys_avail[2*phys_avail_count] = phys_avail[j]; 756 phys_avail[2*phys_avail_count + 1] = 757 kernelstart & ~PAGE_MASK; 758 phys_avail_count++; 759 } 760 761 phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; 762 } 763 } 764 765 physmem = btoc(physsz); 766 767 #ifdef PTEGCOUNT 768 moea64_pteg_count = PTEGCOUNT; 769 #else 770 moea64_pteg_count = 0x1000; 771 772 while (moea64_pteg_count < physmem) 773 moea64_pteg_count <<= 1; 774 775 moea64_pteg_count >>= 1; 776 #endif /* PTEGCOUNT */ 777 } 778 779 void 780 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 781 { 782 int i; 783 784 /* 785 * Set PTEG mask 786 */ 787 moea64_pteg_mask = moea64_pteg_count - 1; 788 789 /* 790 * Initialize SLB table lock and page locks 791 */ 792 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); 793 for (i = 0; i < PV_LOCK_COUNT; i++) 794 mtx_init(&pv_lock[i], "page pv", NULL, MTX_DEF); 795 796 /* 797 * Initialise the bootstrap pvo pool. 798 */ 799 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( 800 moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0); 801 moea64_bpvo_pool_index = 0; 802 803 /* 804 * Make sure kernel vsid is allocated as well as VSID 0. 805 */ 806 #ifndef __powerpc64__ 807 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] 808 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 809 moea64_vsid_bitmap[0] |= 1; 810 #endif 811 812 /* 813 * Initialize the kernel pmap (which is statically allocated). 814 */ 815 #ifdef __powerpc64__ 816 for (i = 0; i < 64; i++) { 817 pcpup->pc_slb[i].slbv = 0; 818 pcpup->pc_slb[i].slbe = 0; 819 } 820 #else 821 for (i = 0; i < 16; i++) 822 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 823 #endif 824 825 kernel_pmap->pmap_phys = kernel_pmap; 826 CPU_FILL(&kernel_pmap->pm_active); 827 RB_INIT(&kernel_pmap->pmap_pvo); 828 829 PMAP_LOCK_INIT(kernel_pmap); 830 831 /* 832 * Now map in all the other buffers we allocated earlier 833 */ 834 835 moea64_setup_direct_map(mmup, kernelstart, kernelend); 836 } 837 838 void 839 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 840 { 841 ihandle_t mmui; 842 phandle_t chosen; 843 phandle_t mmu; 844 ssize_t sz; 845 int i; 846 vm_offset_t pa, va; 847 void *dpcpu; 848 849 /* 850 * Set up the Open Firmware pmap and add its mappings if not in real 851 * mode. 852 */ 853 854 chosen = OF_finddevice("/chosen"); 855 if (!ofw_real_mode && chosen != -1 && 856 OF_getprop(chosen, "mmu", &mmui, 4) != -1) { 857 mmu = OF_instance_to_package(mmui); 858 if (mmu == -1 || 859 (sz = OF_getproplen(mmu, "translations")) == -1) 860 sz = 0; 861 if (sz > 6144 /* tmpstksz - 2 KB headroom */) 862 panic("moea64_bootstrap: too many ofw translations"); 863 864 if (sz > 0) 865 moea64_add_ofw_mappings(mmup, mmu, sz); 866 } 867 868 /* 869 * Calculate the last available physical address. 870 */ 871 for (i = 0; phys_avail[i + 2] != 0; i += 2) 872 ; 873 Maxmem = powerpc_btop(phys_avail[i + 1]); 874 875 /* 876 * Initialize MMU and remap early physical mappings 877 */ 878 MMU_CPU_BOOTSTRAP(mmup,0); 879 mtmsr(mfmsr() | PSL_DR | PSL_IR); 880 pmap_bootstrapped++; 881 bs_remap_earlyboot(); 882 883 /* 884 * Set the start and end of kva. 885 */ 886 virtual_avail = VM_MIN_KERNEL_ADDRESS; 887 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 888 889 /* 890 * Map the entire KVA range into the SLB. We must not fault there. 891 */ 892 #ifdef __powerpc64__ 893 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) 894 moea64_bootstrap_slb_prefault(va, 0); 895 #endif 896 897 /* 898 * Figure out how far we can extend virtual_end into segment 16 899 * without running into existing mappings. Segment 16 is guaranteed 900 * to contain neither RAM nor devices (at least on Apple hardware), 901 * but will generally contain some OFW mappings we should not 902 * step on. 903 */ 904 905 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ 906 PMAP_LOCK(kernel_pmap); 907 while (virtual_end < VM_MAX_KERNEL_ADDRESS && 908 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) 909 virtual_end += PAGE_SIZE; 910 PMAP_UNLOCK(kernel_pmap); 911 #endif 912 913 /* 914 * Allocate a kernel stack with a guard page for thread0 and map it 915 * into the kernel page map. 916 */ 917 pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 918 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 919 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 920 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); 921 thread0.td_kstack = va; 922 thread0.td_kstack_pages = KSTACK_PAGES; 923 for (i = 0; i < KSTACK_PAGES; i++) { 924 moea64_kenter(mmup, va, pa); 925 pa += PAGE_SIZE; 926 va += PAGE_SIZE; 927 } 928 929 /* 930 * Allocate virtual address space for the message buffer. 931 */ 932 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); 933 msgbufp = (struct msgbuf *)virtual_avail; 934 va = virtual_avail; 935 virtual_avail += round_page(msgbufsize); 936 while (va < virtual_avail) { 937 moea64_kenter(mmup, va, pa); 938 pa += PAGE_SIZE; 939 va += PAGE_SIZE; 940 } 941 942 /* 943 * Allocate virtual address space for the dynamic percpu area. 944 */ 945 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 946 dpcpu = (void *)virtual_avail; 947 va = virtual_avail; 948 virtual_avail += DPCPU_SIZE; 949 while (va < virtual_avail) { 950 moea64_kenter(mmup, va, pa); 951 pa += PAGE_SIZE; 952 va += PAGE_SIZE; 953 } 954 dpcpu_init(dpcpu, 0); 955 956 /* 957 * Allocate some things for page zeroing. We put this directly 958 * in the page table and use MOEA64_PTE_REPLACE to avoid any 959 * of the PVO book-keeping or other parts of the VM system 960 * from even knowing that this hack exists. 961 */ 962 963 if (!hw_direct_map) { 964 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, 965 MTX_DEF); 966 for (i = 0; i < 2; i++) { 967 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; 968 virtual_end -= PAGE_SIZE; 969 970 moea64_kenter(mmup, moea64_scratchpage_va[i], 0); 971 972 PMAP_LOCK(kernel_pmap); 973 moea64_scratchpage_pvo[i] = moea64_pvo_find_va( 974 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); 975 PMAP_UNLOCK(kernel_pmap); 976 } 977 } 978 } 979 980 /* 981 * Activate a user pmap. This mostly involves setting some non-CPU 982 * state. 983 */ 984 void 985 moea64_activate(mmu_t mmu, struct thread *td) 986 { 987 pmap_t pm; 988 989 pm = &td->td_proc->p_vmspace->vm_pmap; 990 CPU_SET(PCPU_GET(cpuid), &pm->pm_active); 991 992 #ifdef __powerpc64__ 993 PCPU_SET(userslb, pm->pm_slb); 994 __asm __volatile("slbmte %0, %1; isync" :: 995 "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE)); 996 #else 997 PCPU_SET(curpmap, pm->pmap_phys); 998 mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid); 999 #endif 1000 } 1001 1002 void 1003 moea64_deactivate(mmu_t mmu, struct thread *td) 1004 { 1005 pmap_t pm; 1006 1007 __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR)); 1008 1009 pm = &td->td_proc->p_vmspace->vm_pmap; 1010 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); 1011 #ifdef __powerpc64__ 1012 PCPU_SET(userslb, NULL); 1013 #else 1014 PCPU_SET(curpmap, NULL); 1015 #endif 1016 } 1017 1018 void 1019 moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 1020 { 1021 struct pvo_entry key, *pvo; 1022 vm_page_t m; 1023 int64_t refchg; 1024 1025 key.pvo_vaddr = sva; 1026 PMAP_LOCK(pm); 1027 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 1028 pvo != NULL && PVO_VADDR(pvo) < eva; 1029 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { 1030 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 1031 panic("moea64_unwire: pvo %p is missing PVO_WIRED", 1032 pvo); 1033 pvo->pvo_vaddr &= ~PVO_WIRED; 1034 refchg = MOEA64_PTE_REPLACE(mmu, pvo, 0 /* No invalidation */); 1035 if ((pvo->pvo_vaddr & PVO_MANAGED) && 1036 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1037 if (refchg < 0) 1038 refchg = LPTE_CHG; 1039 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1040 1041 refchg |= atomic_readandclear_32(&m->md.mdpg_attrs); 1042 if (refchg & LPTE_CHG) 1043 vm_page_dirty(m); 1044 if (refchg & LPTE_REF) 1045 vm_page_aflag_set(m, PGA_REFERENCED); 1046 } 1047 pm->pm_stats.wired_count--; 1048 } 1049 PMAP_UNLOCK(pm); 1050 } 1051 1052 /* 1053 * This goes through and sets the physical address of our 1054 * special scratch PTE to the PA we want to zero or copy. Because 1055 * of locking issues (this can get called in pvo_enter() by 1056 * the UMA allocator), we can't use most other utility functions here 1057 */ 1058 1059 static __inline 1060 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { 1061 1062 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); 1063 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); 1064 1065 moea64_scratchpage_pvo[which]->pvo_pte.pa = 1066 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; 1067 MOEA64_PTE_REPLACE(mmup, moea64_scratchpage_pvo[which], 1068 MOEA64_PTE_INVALIDATE); 1069 isync(); 1070 } 1071 1072 void 1073 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1074 { 1075 vm_offset_t dst; 1076 vm_offset_t src; 1077 1078 dst = VM_PAGE_TO_PHYS(mdst); 1079 src = VM_PAGE_TO_PHYS(msrc); 1080 1081 if (hw_direct_map) { 1082 bcopy((void *)src, (void *)dst, PAGE_SIZE); 1083 } else { 1084 mtx_lock(&moea64_scratchpage_mtx); 1085 1086 moea64_set_scratchpage_pa(mmu, 0, src); 1087 moea64_set_scratchpage_pa(mmu, 1, dst); 1088 1089 bcopy((void *)moea64_scratchpage_va[0], 1090 (void *)moea64_scratchpage_va[1], PAGE_SIZE); 1091 1092 mtx_unlock(&moea64_scratchpage_mtx); 1093 } 1094 } 1095 1096 static inline void 1097 moea64_copy_pages_dmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1098 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1099 { 1100 void *a_cp, *b_cp; 1101 vm_offset_t a_pg_offset, b_pg_offset; 1102 int cnt; 1103 1104 while (xfersize > 0) { 1105 a_pg_offset = a_offset & PAGE_MASK; 1106 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 1107 a_cp = (char *)VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]) + 1108 a_pg_offset; 1109 b_pg_offset = b_offset & PAGE_MASK; 1110 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 1111 b_cp = (char *)VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]) + 1112 b_pg_offset; 1113 bcopy(a_cp, b_cp, cnt); 1114 a_offset += cnt; 1115 b_offset += cnt; 1116 xfersize -= cnt; 1117 } 1118 } 1119 1120 static inline void 1121 moea64_copy_pages_nodmap(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1122 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1123 { 1124 void *a_cp, *b_cp; 1125 vm_offset_t a_pg_offset, b_pg_offset; 1126 int cnt; 1127 1128 mtx_lock(&moea64_scratchpage_mtx); 1129 while (xfersize > 0) { 1130 a_pg_offset = a_offset & PAGE_MASK; 1131 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 1132 moea64_set_scratchpage_pa(mmu, 0, 1133 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])); 1134 a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset; 1135 b_pg_offset = b_offset & PAGE_MASK; 1136 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 1137 moea64_set_scratchpage_pa(mmu, 1, 1138 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])); 1139 b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset; 1140 bcopy(a_cp, b_cp, cnt); 1141 a_offset += cnt; 1142 b_offset += cnt; 1143 xfersize -= cnt; 1144 } 1145 mtx_unlock(&moea64_scratchpage_mtx); 1146 } 1147 1148 void 1149 moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, 1150 vm_page_t *mb, vm_offset_t b_offset, int xfersize) 1151 { 1152 1153 if (hw_direct_map) { 1154 moea64_copy_pages_dmap(mmu, ma, a_offset, mb, b_offset, 1155 xfersize); 1156 } else { 1157 moea64_copy_pages_nodmap(mmu, ma, a_offset, mb, b_offset, 1158 xfersize); 1159 } 1160 } 1161 1162 void 1163 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1164 { 1165 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1166 1167 if (size + off > PAGE_SIZE) 1168 panic("moea64_zero_page: size + off > PAGE_SIZE"); 1169 1170 if (hw_direct_map) { 1171 bzero((caddr_t)pa + off, size); 1172 } else { 1173 mtx_lock(&moea64_scratchpage_mtx); 1174 moea64_set_scratchpage_pa(mmu, 0, pa); 1175 bzero((caddr_t)moea64_scratchpage_va[0] + off, size); 1176 mtx_unlock(&moea64_scratchpage_mtx); 1177 } 1178 } 1179 1180 /* 1181 * Zero a page of physical memory by temporarily mapping it 1182 */ 1183 void 1184 moea64_zero_page(mmu_t mmu, vm_page_t m) 1185 { 1186 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1187 vm_offset_t va, off; 1188 1189 if (!hw_direct_map) { 1190 mtx_lock(&moea64_scratchpage_mtx); 1191 1192 moea64_set_scratchpage_pa(mmu, 0, pa); 1193 va = moea64_scratchpage_va[0]; 1194 } else { 1195 va = pa; 1196 } 1197 1198 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 1199 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 1200 1201 if (!hw_direct_map) 1202 mtx_unlock(&moea64_scratchpage_mtx); 1203 } 1204 1205 void 1206 moea64_zero_page_idle(mmu_t mmu, vm_page_t m) 1207 { 1208 1209 moea64_zero_page(mmu, m); 1210 } 1211 1212 /* 1213 * Map the given physical page at the specified virtual address in the 1214 * target pmap with the protection requested. If specified the page 1215 * will be wired down. 1216 */ 1217 1218 int 1219 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1220 vm_prot_t prot, u_int flags, int8_t psind) 1221 { 1222 struct pvo_entry *pvo, *oldpvo; 1223 struct pvo_head *pvo_head; 1224 uint64_t pte_lo; 1225 int error; 1226 1227 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1228 VM_OBJECT_ASSERT_LOCKED(m->object); 1229 1230 pvo = alloc_pvo_entry(0); 1231 pvo->pvo_pmap = NULL; /* to be filled in later */ 1232 pvo->pvo_pte.prot = prot; 1233 1234 pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1235 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo; 1236 1237 if ((flags & PMAP_ENTER_WIRED) != 0) 1238 pvo->pvo_vaddr |= PVO_WIRED; 1239 1240 if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) { 1241 pvo_head = NULL; 1242 } else { 1243 pvo_head = &m->md.mdpg_pvoh; 1244 pvo->pvo_vaddr |= PVO_MANAGED; 1245 } 1246 1247 for (;;) { 1248 PV_PAGE_LOCK(m); 1249 PMAP_LOCK(pmap); 1250 if (pvo->pvo_pmap == NULL) 1251 init_pvo_entry(pvo, pmap, va); 1252 if (prot & VM_PROT_WRITE) 1253 if (pmap_bootstrapped && 1254 (m->oflags & VPO_UNMANAGED) == 0) 1255 vm_page_aflag_set(m, PGA_WRITEABLE); 1256 1257 oldpvo = moea64_pvo_find_va(pmap, va); 1258 if (oldpvo != NULL) { 1259 if (oldpvo->pvo_vaddr == pvo->pvo_vaddr && 1260 oldpvo->pvo_pte.pa == pvo->pvo_pte.pa && 1261 oldpvo->pvo_pte.prot == prot) { 1262 /* Identical mapping already exists */ 1263 error = 0; 1264 1265 /* If not in page table, reinsert it */ 1266 if (MOEA64_PTE_SYNCH(mmu, oldpvo) < 0) { 1267 moea64_pte_overflow--; 1268 MOEA64_PTE_INSERT(mmu, oldpvo); 1269 } 1270 1271 /* Then just clean up and go home */ 1272 PV_PAGE_UNLOCK(m); 1273 PMAP_UNLOCK(pmap); 1274 free_pvo_entry(pvo); 1275 break; 1276 } 1277 1278 /* Otherwise, need to kill it first */ 1279 KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old " 1280 "mapping does not match new mapping")); 1281 moea64_pvo_remove_from_pmap(mmu, oldpvo); 1282 } 1283 error = moea64_pvo_enter(mmu, pvo, pvo_head); 1284 PV_PAGE_UNLOCK(m); 1285 PMAP_UNLOCK(pmap); 1286 1287 /* Free any dead pages */ 1288 if (oldpvo != NULL) { 1289 PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1290 moea64_pvo_remove_from_page(mmu, oldpvo); 1291 PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1292 free_pvo_entry(oldpvo); 1293 } 1294 1295 if (error != ENOMEM) 1296 break; 1297 if ((flags & PMAP_ENTER_NOSLEEP) != 0) 1298 return (KERN_RESOURCE_SHORTAGE); 1299 VM_OBJECT_ASSERT_UNLOCKED(m->object); 1300 VM_WAIT; 1301 } 1302 1303 /* 1304 * Flush the page from the instruction cache if this page is 1305 * mapped executable and cacheable. 1306 */ 1307 if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) && 1308 (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1309 vm_page_aflag_set(m, PGA_EXECUTABLE); 1310 moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1311 } 1312 return (KERN_SUCCESS); 1313 } 1314 1315 static void 1316 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, 1317 vm_size_t sz) 1318 { 1319 1320 /* 1321 * This is much trickier than on older systems because 1322 * we can't sync the icache on physical addresses directly 1323 * without a direct map. Instead we check a couple of cases 1324 * where the memory is already mapped in and, failing that, 1325 * use the same trick we use for page zeroing to create 1326 * a temporary mapping for this physical address. 1327 */ 1328 1329 if (!pmap_bootstrapped) { 1330 /* 1331 * If PMAP is not bootstrapped, we are likely to be 1332 * in real mode. 1333 */ 1334 __syncicache((void *)pa, sz); 1335 } else if (pmap == kernel_pmap) { 1336 __syncicache((void *)va, sz); 1337 } else if (hw_direct_map) { 1338 __syncicache((void *)pa, sz); 1339 } else { 1340 /* Use the scratch page to set up a temp mapping */ 1341 1342 mtx_lock(&moea64_scratchpage_mtx); 1343 1344 moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); 1345 __syncicache((void *)(moea64_scratchpage_va[1] + 1346 (va & ADDR_POFF)), sz); 1347 1348 mtx_unlock(&moea64_scratchpage_mtx); 1349 } 1350 } 1351 1352 /* 1353 * Maps a sequence of resident pages belonging to the same object. 1354 * The sequence begins with the given page m_start. This page is 1355 * mapped at the given virtual address start. Each subsequent page is 1356 * mapped at a virtual address that is offset from start by the same 1357 * amount as the page is offset from m_start within the object. The 1358 * last page in the sequence is the page with the largest offset from 1359 * m_start that can be mapped at a virtual address less than the given 1360 * virtual address end. Not every virtual page between start and end 1361 * is mapped; only those for which a resident page exists with the 1362 * corresponding offset from m_start are mapped. 1363 */ 1364 void 1365 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1366 vm_page_t m_start, vm_prot_t prot) 1367 { 1368 vm_page_t m; 1369 vm_pindex_t diff, psize; 1370 1371 VM_OBJECT_ASSERT_LOCKED(m_start->object); 1372 1373 psize = atop(end - start); 1374 m = m_start; 1375 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1376 moea64_enter(mmu, pm, start + ptoa(diff), m, prot & 1377 (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP, 0); 1378 m = TAILQ_NEXT(m, listq); 1379 } 1380 } 1381 1382 void 1383 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1384 vm_prot_t prot) 1385 { 1386 1387 moea64_enter(mmu, pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), 1388 PMAP_ENTER_NOSLEEP, 0); 1389 } 1390 1391 vm_paddr_t 1392 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1393 { 1394 struct pvo_entry *pvo; 1395 vm_paddr_t pa; 1396 1397 PMAP_LOCK(pm); 1398 pvo = moea64_pvo_find_va(pm, va); 1399 if (pvo == NULL) 1400 pa = 0; 1401 else 1402 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1403 PMAP_UNLOCK(pm); 1404 1405 return (pa); 1406 } 1407 1408 /* 1409 * Atomically extract and hold the physical page with the given 1410 * pmap and virtual address pair if that mapping permits the given 1411 * protection. 1412 */ 1413 vm_page_t 1414 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1415 { 1416 struct pvo_entry *pvo; 1417 vm_page_t m; 1418 vm_paddr_t pa; 1419 1420 m = NULL; 1421 pa = 0; 1422 PMAP_LOCK(pmap); 1423 retry: 1424 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1425 if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { 1426 if (vm_page_pa_tryrelock(pmap, 1427 pvo->pvo_pte.pa & LPTE_RPGN, &pa)) 1428 goto retry; 1429 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1430 vm_page_hold(m); 1431 } 1432 PA_UNLOCK_COND(pa); 1433 PMAP_UNLOCK(pmap); 1434 return (m); 1435 } 1436 1437 static mmu_t installed_mmu; 1438 1439 static void * 1440 moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 1441 { 1442 struct pvo_entry *pvo; 1443 vm_offset_t va; 1444 vm_page_t m; 1445 int pflags, needed_lock; 1446 1447 /* 1448 * This entire routine is a horrible hack to avoid bothering kmem 1449 * for new KVA addresses. Because this can get called from inside 1450 * kmem allocation routines, calling kmem for a new address here 1451 * can lead to multiply locking non-recursive mutexes. 1452 */ 1453 1454 *flags = UMA_SLAB_PRIV; 1455 needed_lock = !PMAP_LOCKED(kernel_pmap); 1456 pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; 1457 1458 for (;;) { 1459 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 1460 if (m == NULL) { 1461 if (wait & M_NOWAIT) 1462 return (NULL); 1463 VM_WAIT; 1464 } else 1465 break; 1466 } 1467 1468 va = VM_PAGE_TO_PHYS(m); 1469 1470 pvo = alloc_pvo_entry(1 /* bootstrap */); 1471 1472 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE; 1473 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M; 1474 1475 if (needed_lock) 1476 PMAP_LOCK(kernel_pmap); 1477 1478 init_pvo_entry(pvo, kernel_pmap, va); 1479 pvo->pvo_vaddr |= PVO_WIRED; 1480 1481 moea64_pvo_enter(installed_mmu, pvo, NULL); 1482 1483 if (needed_lock) 1484 PMAP_UNLOCK(kernel_pmap); 1485 1486 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 1487 bzero((void *)va, PAGE_SIZE); 1488 1489 return (void *)va; 1490 } 1491 1492 extern int elf32_nxstack; 1493 1494 void 1495 moea64_init(mmu_t mmu) 1496 { 1497 1498 CTR0(KTR_PMAP, "moea64_init"); 1499 1500 moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1501 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1502 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1503 1504 if (!hw_direct_map) { 1505 installed_mmu = mmu; 1506 uma_zone_set_allocf(moea64_pvo_zone,moea64_uma_page_alloc); 1507 } 1508 1509 #ifdef COMPAT_FREEBSD32 1510 elf32_nxstack = 1; 1511 #endif 1512 1513 moea64_initialized = TRUE; 1514 } 1515 1516 boolean_t 1517 moea64_is_referenced(mmu_t mmu, vm_page_t m) 1518 { 1519 1520 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1521 ("moea64_is_referenced: page %p is not managed", m)); 1522 1523 return (moea64_query_bit(mmu, m, LPTE_REF)); 1524 } 1525 1526 boolean_t 1527 moea64_is_modified(mmu_t mmu, vm_page_t m) 1528 { 1529 1530 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1531 ("moea64_is_modified: page %p is not managed", m)); 1532 1533 /* 1534 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 1535 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 1536 * is clear, no PTEs can have LPTE_CHG set. 1537 */ 1538 VM_OBJECT_ASSERT_LOCKED(m->object); 1539 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 1540 return (FALSE); 1541 return (moea64_query_bit(mmu, m, LPTE_CHG)); 1542 } 1543 1544 boolean_t 1545 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1546 { 1547 struct pvo_entry *pvo; 1548 boolean_t rv = TRUE; 1549 1550 PMAP_LOCK(pmap); 1551 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1552 if (pvo != NULL) 1553 rv = FALSE; 1554 PMAP_UNLOCK(pmap); 1555 return (rv); 1556 } 1557 1558 void 1559 moea64_clear_modify(mmu_t mmu, vm_page_t m) 1560 { 1561 1562 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1563 ("moea64_clear_modify: page %p is not managed", m)); 1564 VM_OBJECT_ASSERT_WLOCKED(m->object); 1565 KASSERT(!vm_page_xbusied(m), 1566 ("moea64_clear_modify: page %p is exclusive busied", m)); 1567 1568 /* 1569 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG 1570 * set. If the object containing the page is locked and the page is 1571 * not exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 1572 */ 1573 if ((m->aflags & PGA_WRITEABLE) == 0) 1574 return; 1575 moea64_clear_bit(mmu, m, LPTE_CHG); 1576 } 1577 1578 /* 1579 * Clear the write and modified bits in each of the given page's mappings. 1580 */ 1581 void 1582 moea64_remove_write(mmu_t mmu, vm_page_t m) 1583 { 1584 struct pvo_entry *pvo; 1585 int64_t refchg, ret; 1586 pmap_t pmap; 1587 1588 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1589 ("moea64_remove_write: page %p is not managed", m)); 1590 1591 /* 1592 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 1593 * set by another thread while the object is locked. Thus, 1594 * if PGA_WRITEABLE is clear, no page table entries need updating. 1595 */ 1596 VM_OBJECT_ASSERT_WLOCKED(m->object); 1597 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 1598 return; 1599 powerpc_sync(); 1600 PV_PAGE_LOCK(m); 1601 refchg = 0; 1602 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1603 pmap = pvo->pvo_pmap; 1604 PMAP_LOCK(pmap); 1605 if (!(pvo->pvo_vaddr & PVO_DEAD) && 1606 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1607 pvo->pvo_pte.prot &= ~VM_PROT_WRITE; 1608 ret = MOEA64_PTE_REPLACE(mmu, pvo, 1609 MOEA64_PTE_PROT_UPDATE); 1610 if (ret < 0) 1611 ret = LPTE_CHG; 1612 refchg |= ret; 1613 if (pvo->pvo_pmap == kernel_pmap) 1614 isync(); 1615 } 1616 PMAP_UNLOCK(pmap); 1617 } 1618 if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG) 1619 vm_page_dirty(m); 1620 vm_page_aflag_clear(m, PGA_WRITEABLE); 1621 PV_PAGE_UNLOCK(m); 1622 } 1623 1624 /* 1625 * moea64_ts_referenced: 1626 * 1627 * Return a count of reference bits for a page, clearing those bits. 1628 * It is not necessary for every reference bit to be cleared, but it 1629 * is necessary that 0 only be returned when there are truly no 1630 * reference bits set. 1631 * 1632 * XXX: The exact number of bits to check and clear is a matter that 1633 * should be tested and standardized at some point in the future for 1634 * optimal aging of shared pages. 1635 */ 1636 int 1637 moea64_ts_referenced(mmu_t mmu, vm_page_t m) 1638 { 1639 1640 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1641 ("moea64_ts_referenced: page %p is not managed", m)); 1642 return (moea64_clear_bit(mmu, m, LPTE_REF)); 1643 } 1644 1645 /* 1646 * Modify the WIMG settings of all mappings for a page. 1647 */ 1648 void 1649 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1650 { 1651 struct pvo_entry *pvo; 1652 int64_t refchg; 1653 pmap_t pmap; 1654 uint64_t lo; 1655 1656 if ((m->oflags & VPO_UNMANAGED) != 0) { 1657 m->md.mdpg_cache_attrs = ma; 1658 return; 1659 } 1660 1661 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1662 1663 PV_PAGE_LOCK(m); 1664 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1665 pmap = pvo->pvo_pmap; 1666 PMAP_LOCK(pmap); 1667 if (!(pvo->pvo_vaddr & PVO_DEAD)) { 1668 pvo->pvo_pte.pa &= ~LPTE_WIMG; 1669 pvo->pvo_pte.pa |= lo; 1670 refchg = MOEA64_PTE_REPLACE(mmu, pvo, 1671 MOEA64_PTE_INVALIDATE); 1672 if (refchg < 0) 1673 refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ? 1674 LPTE_CHG : 0; 1675 if ((pvo->pvo_vaddr & PVO_MANAGED) && 1676 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 1677 refchg |= 1678 atomic_readandclear_32(&m->md.mdpg_attrs); 1679 if (refchg & LPTE_CHG) 1680 vm_page_dirty(m); 1681 if (refchg & LPTE_REF) 1682 vm_page_aflag_set(m, PGA_REFERENCED); 1683 } 1684 if (pvo->pvo_pmap == kernel_pmap) 1685 isync(); 1686 } 1687 PMAP_UNLOCK(pmap); 1688 } 1689 m->md.mdpg_cache_attrs = ma; 1690 PV_PAGE_UNLOCK(m); 1691 } 1692 1693 /* 1694 * Map a wired page into kernel virtual address space. 1695 */ 1696 void 1697 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1698 { 1699 int error; 1700 struct pvo_entry *pvo, *oldpvo; 1701 1702 pvo = alloc_pvo_entry(0); 1703 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 1704 pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma); 1705 pvo->pvo_vaddr |= PVO_WIRED; 1706 1707 PMAP_LOCK(kernel_pmap); 1708 oldpvo = moea64_pvo_find_va(kernel_pmap, va); 1709 if (oldpvo != NULL) 1710 moea64_pvo_remove_from_pmap(mmu, oldpvo); 1711 init_pvo_entry(pvo, kernel_pmap, va); 1712 error = moea64_pvo_enter(mmu, pvo, NULL); 1713 PMAP_UNLOCK(kernel_pmap); 1714 1715 /* Free any dead pages */ 1716 if (oldpvo != NULL) { 1717 PV_LOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1718 moea64_pvo_remove_from_page(mmu, oldpvo); 1719 PV_UNLOCK(oldpvo->pvo_pte.pa & LPTE_RPGN); 1720 free_pvo_entry(oldpvo); 1721 } 1722 1723 if (error != 0 && error != ENOENT) 1724 panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, 1725 pa, error); 1726 } 1727 1728 void 1729 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa) 1730 { 1731 1732 moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1733 } 1734 1735 /* 1736 * Extract the physical page address associated with the given kernel virtual 1737 * address. 1738 */ 1739 vm_paddr_t 1740 moea64_kextract(mmu_t mmu, vm_offset_t va) 1741 { 1742 struct pvo_entry *pvo; 1743 vm_paddr_t pa; 1744 1745 /* 1746 * Shortcut the direct-mapped case when applicable. We never put 1747 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. 1748 */ 1749 if (va < VM_MIN_KERNEL_ADDRESS) 1750 return (va); 1751 1752 PMAP_LOCK(kernel_pmap); 1753 pvo = moea64_pvo_find_va(kernel_pmap, va); 1754 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, 1755 va)); 1756 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1757 PMAP_UNLOCK(kernel_pmap); 1758 return (pa); 1759 } 1760 1761 /* 1762 * Remove a wired page from kernel virtual address space. 1763 */ 1764 void 1765 moea64_kremove(mmu_t mmu, vm_offset_t va) 1766 { 1767 moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1768 } 1769 1770 /* 1771 * Map a range of physical addresses into kernel virtual address space. 1772 * 1773 * The value passed in *virt is a suggested virtual address for the mapping. 1774 * Architectures which can support a direct-mapped physical to virtual region 1775 * can return the appropriate address within that region, leaving '*virt' 1776 * unchanged. Other architectures should map the pages starting at '*virt' and 1777 * update '*virt' with the first usable address after the mapped region. 1778 */ 1779 vm_offset_t 1780 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start, 1781 vm_paddr_t pa_end, int prot) 1782 { 1783 vm_offset_t sva, va; 1784 1785 if (hw_direct_map) { 1786 /* 1787 * Check if every page in the region is covered by the direct 1788 * map. The direct map covers all of physical memory. Use 1789 * moea64_calc_wimg() as a shortcut to see if the page is in 1790 * physical memory as a way to see if the direct map covers it. 1791 */ 1792 for (va = pa_start; va < pa_end; va += PAGE_SIZE) 1793 if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M) 1794 break; 1795 if (va == pa_end) 1796 return (pa_start); 1797 } 1798 sva = *virt; 1799 va = sva; 1800 /* XXX respect prot argument */ 1801 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1802 moea64_kenter(mmu, va, pa_start); 1803 *virt = va; 1804 1805 return (sva); 1806 } 1807 1808 /* 1809 * Returns true if the pmap's pv is one of the first 1810 * 16 pvs linked to from this page. This count may 1811 * be changed upwards or downwards in the future; it 1812 * is only necessary that true be returned for a small 1813 * subset of pmaps for proper page aging. 1814 */ 1815 boolean_t 1816 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1817 { 1818 int loops; 1819 struct pvo_entry *pvo; 1820 boolean_t rv; 1821 1822 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1823 ("moea64_page_exists_quick: page %p is not managed", m)); 1824 loops = 0; 1825 rv = FALSE; 1826 PV_PAGE_LOCK(m); 1827 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1828 if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) { 1829 rv = TRUE; 1830 break; 1831 } 1832 if (++loops >= 16) 1833 break; 1834 } 1835 PV_PAGE_UNLOCK(m); 1836 return (rv); 1837 } 1838 1839 /* 1840 * Return the number of managed mappings to the given physical page 1841 * that are wired. 1842 */ 1843 int 1844 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) 1845 { 1846 struct pvo_entry *pvo; 1847 int count; 1848 1849 count = 0; 1850 if ((m->oflags & VPO_UNMANAGED) != 0) 1851 return (count); 1852 PV_PAGE_LOCK(m); 1853 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1854 if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED) 1855 count++; 1856 PV_PAGE_UNLOCK(m); 1857 return (count); 1858 } 1859 1860 static uintptr_t moea64_vsidcontext; 1861 1862 uintptr_t 1863 moea64_get_unique_vsid(void) { 1864 u_int entropy; 1865 register_t hash; 1866 uint32_t mask; 1867 int i; 1868 1869 entropy = 0; 1870 __asm __volatile("mftb %0" : "=r"(entropy)); 1871 1872 mtx_lock(&moea64_slb_mutex); 1873 for (i = 0; i < NVSIDS; i += VSID_NBPW) { 1874 u_int n; 1875 1876 /* 1877 * Create a new value by mutiplying by a prime and adding in 1878 * entropy from the timebase register. This is to make the 1879 * VSID more random so that the PT hash function collides 1880 * less often. (Note that the prime casues gcc to do shifts 1881 * instead of a multiply.) 1882 */ 1883 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; 1884 hash = moea64_vsidcontext & (NVSIDS - 1); 1885 if (hash == 0) /* 0 is special, avoid it */ 1886 continue; 1887 n = hash >> 5; 1888 mask = 1 << (hash & (VSID_NBPW - 1)); 1889 hash = (moea64_vsidcontext & VSID_HASHMASK); 1890 if (moea64_vsid_bitmap[n] & mask) { /* collision? */ 1891 /* anything free in this bucket? */ 1892 if (moea64_vsid_bitmap[n] == 0xffffffff) { 1893 entropy = (moea64_vsidcontext >> 20); 1894 continue; 1895 } 1896 i = ffs(~moea64_vsid_bitmap[n]) - 1; 1897 mask = 1 << i; 1898 hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); 1899 hash |= i; 1900 } 1901 KASSERT(!(moea64_vsid_bitmap[n] & mask), 1902 ("Allocating in-use VSID %#zx\n", hash)); 1903 moea64_vsid_bitmap[n] |= mask; 1904 mtx_unlock(&moea64_slb_mutex); 1905 return (hash); 1906 } 1907 1908 mtx_unlock(&moea64_slb_mutex); 1909 panic("%s: out of segments",__func__); 1910 } 1911 1912 #ifdef __powerpc64__ 1913 void 1914 moea64_pinit(mmu_t mmu, pmap_t pmap) 1915 { 1916 1917 RB_INIT(&pmap->pmap_pvo); 1918 1919 pmap->pm_slb_tree_root = slb_alloc_tree(); 1920 pmap->pm_slb = slb_alloc_user_cache(); 1921 pmap->pm_slb_len = 0; 1922 } 1923 #else 1924 void 1925 moea64_pinit(mmu_t mmu, pmap_t pmap) 1926 { 1927 int i; 1928 uint32_t hash; 1929 1930 RB_INIT(&pmap->pmap_pvo); 1931 1932 if (pmap_bootstrapped) 1933 pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, 1934 (vm_offset_t)pmap); 1935 else 1936 pmap->pmap_phys = pmap; 1937 1938 /* 1939 * Allocate some segment registers for this pmap. 1940 */ 1941 hash = moea64_get_unique_vsid(); 1942 1943 for (i = 0; i < 16; i++) 1944 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1945 1946 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); 1947 } 1948 #endif 1949 1950 /* 1951 * Initialize the pmap associated with process 0. 1952 */ 1953 void 1954 moea64_pinit0(mmu_t mmu, pmap_t pm) 1955 { 1956 1957 PMAP_LOCK_INIT(pm); 1958 moea64_pinit(mmu, pm); 1959 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1960 } 1961 1962 /* 1963 * Set the physical protection on the specified range of this map as requested. 1964 */ 1965 static void 1966 moea64_pvo_protect(mmu_t mmu, pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot) 1967 { 1968 struct vm_page *pg; 1969 vm_prot_t oldprot; 1970 int32_t refchg; 1971 1972 PMAP_LOCK_ASSERT(pm, MA_OWNED); 1973 1974 /* 1975 * Change the protection of the page. 1976 */ 1977 oldprot = pvo->pvo_pte.prot; 1978 pvo->pvo_pte.prot = prot; 1979 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 1980 1981 /* 1982 * If the PVO is in the page table, update mapping 1983 */ 1984 refchg = MOEA64_PTE_REPLACE(mmu, pvo, MOEA64_PTE_PROT_UPDATE); 1985 if (refchg < 0) 1986 refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0; 1987 1988 if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) && 1989 (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1990 if ((pg->oflags & VPO_UNMANAGED) == 0) 1991 vm_page_aflag_set(pg, PGA_EXECUTABLE); 1992 moea64_syncicache(mmu, pm, PVO_VADDR(pvo), 1993 pvo->pvo_pte.pa & LPTE_RPGN, PAGE_SIZE); 1994 } 1995 1996 /* 1997 * Update vm about the REF/CHG bits if the page is managed and we have 1998 * removed write access. 1999 */ 2000 if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) && 2001 (oldprot & VM_PROT_WRITE)) { 2002 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); 2003 if (refchg & LPTE_CHG) 2004 vm_page_dirty(pg); 2005 if (refchg & LPTE_REF) 2006 vm_page_aflag_set(pg, PGA_REFERENCED); 2007 } 2008 } 2009 2010 void 2011 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 2012 vm_prot_t prot) 2013 { 2014 struct pvo_entry *pvo, *tpvo, key; 2015 2016 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, 2017 sva, eva, prot); 2018 2019 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 2020 ("moea64_protect: non current pmap")); 2021 2022 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2023 moea64_remove(mmu, pm, sva, eva); 2024 return; 2025 } 2026 2027 PMAP_LOCK(pm); 2028 key.pvo_vaddr = sva; 2029 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 2030 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { 2031 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); 2032 moea64_pvo_protect(mmu, pm, pvo, prot); 2033 } 2034 PMAP_UNLOCK(pm); 2035 } 2036 2037 /* 2038 * Map a list of wired pages into kernel virtual address space. This is 2039 * intended for temporary mappings which do not need page modification or 2040 * references recorded. Existing mappings in the region are overwritten. 2041 */ 2042 void 2043 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) 2044 { 2045 while (count-- > 0) { 2046 moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 2047 va += PAGE_SIZE; 2048 m++; 2049 } 2050 } 2051 2052 /* 2053 * Remove page mappings from kernel virtual address space. Intended for 2054 * temporary mappings entered by moea64_qenter. 2055 */ 2056 void 2057 moea64_qremove(mmu_t mmu, vm_offset_t va, int count) 2058 { 2059 while (count-- > 0) { 2060 moea64_kremove(mmu, va); 2061 va += PAGE_SIZE; 2062 } 2063 } 2064 2065 void 2066 moea64_release_vsid(uint64_t vsid) 2067 { 2068 int idx, mask; 2069 2070 mtx_lock(&moea64_slb_mutex); 2071 idx = vsid & (NVSIDS-1); 2072 mask = 1 << (idx % VSID_NBPW); 2073 idx /= VSID_NBPW; 2074 KASSERT(moea64_vsid_bitmap[idx] & mask, 2075 ("Freeing unallocated VSID %#jx", vsid)); 2076 moea64_vsid_bitmap[idx] &= ~mask; 2077 mtx_unlock(&moea64_slb_mutex); 2078 } 2079 2080 2081 void 2082 moea64_release(mmu_t mmu, pmap_t pmap) 2083 { 2084 2085 /* 2086 * Free segment registers' VSIDs 2087 */ 2088 #ifdef __powerpc64__ 2089 slb_free_tree(pmap); 2090 slb_free_user_cache(pmap->pm_slb); 2091 #else 2092 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); 2093 2094 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); 2095 #endif 2096 } 2097 2098 /* 2099 * Remove all pages mapped by the specified pmap 2100 */ 2101 void 2102 moea64_remove_pages(mmu_t mmu, pmap_t pm) 2103 { 2104 struct pvo_entry *pvo, *tpvo; 2105 struct pvo_tree tofree; 2106 2107 RB_INIT(&tofree); 2108 2109 PMAP_LOCK(pm); 2110 RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) { 2111 if (pvo->pvo_vaddr & PVO_WIRED) 2112 continue; 2113 2114 /* 2115 * For locking reasons, remove this from the page table and 2116 * pmap, but save delinking from the vm_page for a second 2117 * pass 2118 */ 2119 moea64_pvo_remove_from_pmap(mmu, pvo); 2120 RB_INSERT(pvo_tree, &tofree, pvo); 2121 } 2122 PMAP_UNLOCK(pm); 2123 2124 RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { 2125 PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2126 moea64_pvo_remove_from_page(mmu, pvo); 2127 PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2128 RB_REMOVE(pvo_tree, &tofree, pvo); 2129 free_pvo_entry(pvo); 2130 } 2131 } 2132 2133 /* 2134 * Remove the given range of addresses from the specified map. 2135 */ 2136 void 2137 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 2138 { 2139 struct pvo_entry *pvo, *tpvo, key; 2140 struct pvo_tree tofree; 2141 2142 /* 2143 * Perform an unsynchronized read. This is, however, safe. 2144 */ 2145 if (pm->pm_stats.resident_count == 0) 2146 return; 2147 2148 key.pvo_vaddr = sva; 2149 2150 RB_INIT(&tofree); 2151 2152 PMAP_LOCK(pm); 2153 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); 2154 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) { 2155 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo); 2156 2157 /* 2158 * For locking reasons, remove this from the page table and 2159 * pmap, but save delinking from the vm_page for a second 2160 * pass 2161 */ 2162 moea64_pvo_remove_from_pmap(mmu, pvo); 2163 RB_INSERT(pvo_tree, &tofree, pvo); 2164 } 2165 PMAP_UNLOCK(pm); 2166 2167 RB_FOREACH_SAFE(pvo, pvo_tree, &tofree, tpvo) { 2168 PV_LOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2169 moea64_pvo_remove_from_page(mmu, pvo); 2170 PV_UNLOCK(pvo->pvo_pte.pa & LPTE_RPGN); 2171 RB_REMOVE(pvo_tree, &tofree, pvo); 2172 free_pvo_entry(pvo); 2173 } 2174 } 2175 2176 /* 2177 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() 2178 * will reflect changes in pte's back to the vm_page. 2179 */ 2180 void 2181 moea64_remove_all(mmu_t mmu, vm_page_t m) 2182 { 2183 struct pvo_entry *pvo, *next_pvo; 2184 struct pvo_head freequeue; 2185 int wasdead; 2186 pmap_t pmap; 2187 2188 LIST_INIT(&freequeue); 2189 2190 PV_PAGE_LOCK(m); 2191 LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) { 2192 pmap = pvo->pvo_pmap; 2193 PMAP_LOCK(pmap); 2194 wasdead = (pvo->pvo_vaddr & PVO_DEAD); 2195 if (!wasdead) 2196 moea64_pvo_remove_from_pmap(mmu, pvo); 2197 moea64_pvo_remove_from_page(mmu, pvo); 2198 if (!wasdead) 2199 LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink); 2200 PMAP_UNLOCK(pmap); 2201 2202 } 2203 KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings")); 2204 KASSERT(!(m->aflags & PGA_WRITEABLE), ("Page still writable")); 2205 PV_PAGE_UNLOCK(m); 2206 2207 /* Clean up UMA allocations */ 2208 LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo) 2209 free_pvo_entry(pvo); 2210 } 2211 2212 /* 2213 * Allocate a physical page of memory directly from the phys_avail map. 2214 * Can only be called from moea64_bootstrap before avail start and end are 2215 * calculated. 2216 */ 2217 vm_offset_t 2218 moea64_bootstrap_alloc(vm_size_t size, u_int align) 2219 { 2220 vm_offset_t s, e; 2221 int i, j; 2222 2223 size = round_page(size); 2224 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 2225 if (align != 0) 2226 s = (phys_avail[i] + align - 1) & ~(align - 1); 2227 else 2228 s = phys_avail[i]; 2229 e = s + size; 2230 2231 if (s < phys_avail[i] || e > phys_avail[i + 1]) 2232 continue; 2233 2234 if (s + size > platform_real_maxaddr()) 2235 continue; 2236 2237 if (s == phys_avail[i]) { 2238 phys_avail[i] += size; 2239 } else if (e == phys_avail[i + 1]) { 2240 phys_avail[i + 1] -= size; 2241 } else { 2242 for (j = phys_avail_count * 2; j > i; j -= 2) { 2243 phys_avail[j] = phys_avail[j - 2]; 2244 phys_avail[j + 1] = phys_avail[j - 1]; 2245 } 2246 2247 phys_avail[i + 3] = phys_avail[i + 1]; 2248 phys_avail[i + 1] = s; 2249 phys_avail[i + 2] = e; 2250 phys_avail_count++; 2251 } 2252 2253 return (s); 2254 } 2255 panic("moea64_bootstrap_alloc: could not allocate memory"); 2256 } 2257 2258 static int 2259 moea64_pvo_enter(mmu_t mmu, struct pvo_entry *pvo, struct pvo_head *pvo_head) 2260 { 2261 int first, err; 2262 2263 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 2264 KASSERT(moea64_pvo_find_va(pvo->pvo_pmap, PVO_VADDR(pvo)) == NULL, 2265 ("Existing mapping for VA %#jx", (uintmax_t)PVO_VADDR(pvo))); 2266 2267 moea64_pvo_enter_calls++; 2268 2269 /* 2270 * Add to pmap list 2271 */ 2272 RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); 2273 2274 /* 2275 * Remember if the list was empty and therefore will be the first 2276 * item. 2277 */ 2278 if (pvo_head != NULL) { 2279 if (LIST_FIRST(pvo_head) == NULL) 2280 first = 1; 2281 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 2282 } 2283 2284 if (pvo->pvo_vaddr & PVO_WIRED) 2285 pvo->pvo_pmap->pm_stats.wired_count++; 2286 pvo->pvo_pmap->pm_stats.resident_count++; 2287 2288 /* 2289 * Insert it into the hardware page table 2290 */ 2291 err = MOEA64_PTE_INSERT(mmu, pvo); 2292 if (err != 0) { 2293 panic("moea64_pvo_enter: overflow"); 2294 } 2295 2296 moea64_pvo_entries++; 2297 2298 if (pvo->pvo_pmap == kernel_pmap) 2299 isync(); 2300 2301 #ifdef __powerpc64__ 2302 /* 2303 * Make sure all our bootstrap mappings are in the SLB as soon 2304 * as virtual memory is switched on. 2305 */ 2306 if (!pmap_bootstrapped) 2307 moea64_bootstrap_slb_prefault(PVO_VADDR(pvo), 2308 pvo->pvo_vaddr & PVO_LARGE); 2309 #endif 2310 2311 return (first ? ENOENT : 0); 2312 } 2313 2314 static void 2315 moea64_pvo_remove_from_pmap(mmu_t mmu, struct pvo_entry *pvo) 2316 { 2317 struct vm_page *pg; 2318 int32_t refchg; 2319 2320 KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap")); 2321 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 2322 KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO")); 2323 2324 /* 2325 * If there is an active pte entry, we need to deactivate it 2326 */ 2327 refchg = MOEA64_PTE_UNSET(mmu, pvo); 2328 if (refchg < 0) { 2329 /* 2330 * If it was evicted from the page table, be pessimistic and 2331 * dirty the page. 2332 */ 2333 if (pvo->pvo_pte.prot & VM_PROT_WRITE) 2334 refchg = LPTE_CHG; 2335 else 2336 refchg = 0; 2337 } 2338 2339 /* 2340 * Update our statistics. 2341 */ 2342 pvo->pvo_pmap->pm_stats.resident_count--; 2343 if (pvo->pvo_vaddr & PVO_WIRED) 2344 pvo->pvo_pmap->pm_stats.wired_count--; 2345 2346 /* 2347 * Remove this PVO from the pmap list. 2348 */ 2349 RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo); 2350 2351 /* 2352 * Mark this for the next sweep 2353 */ 2354 pvo->pvo_vaddr |= PVO_DEAD; 2355 2356 /* Send RC bits to VM */ 2357 if ((pvo->pvo_vaddr & PVO_MANAGED) && 2358 (pvo->pvo_pte.prot & VM_PROT_WRITE)) { 2359 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 2360 if (pg != NULL) { 2361 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs); 2362 if (refchg & LPTE_CHG) 2363 vm_page_dirty(pg); 2364 if (refchg & LPTE_REF) 2365 vm_page_aflag_set(pg, PGA_REFERENCED); 2366 } 2367 } 2368 } 2369 2370 static void 2371 moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo) 2372 { 2373 struct vm_page *pg; 2374 2375 KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page")); 2376 2377 /* Use NULL pmaps as a sentinel for races in page deletion */ 2378 if (pvo->pvo_pmap == NULL) 2379 return; 2380 pvo->pvo_pmap = NULL; 2381 2382 /* 2383 * Update vm about page writeability/executability if managed 2384 */ 2385 PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN); 2386 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); 2387 2388 if ((pvo->pvo_vaddr & PVO_MANAGED) && pg != NULL) { 2389 LIST_REMOVE(pvo, pvo_vlink); 2390 if (LIST_EMPTY(vm_page_to_pvoh(pg))) 2391 vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE); 2392 } 2393 2394 moea64_pvo_entries--; 2395 moea64_pvo_remove_calls++; 2396 } 2397 2398 static struct pvo_entry * 2399 moea64_pvo_find_va(pmap_t pm, vm_offset_t va) 2400 { 2401 struct pvo_entry key; 2402 2403 PMAP_LOCK_ASSERT(pm, MA_OWNED); 2404 2405 key.pvo_vaddr = va & ~ADDR_POFF; 2406 return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key)); 2407 } 2408 2409 static boolean_t 2410 moea64_query_bit(mmu_t mmu, vm_page_t m, uint64_t ptebit) 2411 { 2412 struct pvo_entry *pvo; 2413 int64_t ret; 2414 boolean_t rv; 2415 2416 /* 2417 * See if this bit is stored in the page already. 2418 */ 2419 if (m->md.mdpg_attrs & ptebit) 2420 return (TRUE); 2421 2422 /* 2423 * Examine each PTE. Sync so that any pending REF/CHG bits are 2424 * flushed to the PTEs. 2425 */ 2426 rv = FALSE; 2427 powerpc_sync(); 2428 PV_PAGE_LOCK(m); 2429 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2430 ret = 0; 2431 2432 /* 2433 * See if this pvo has a valid PTE. if so, fetch the 2434 * REF/CHG bits from the valid PTE. If the appropriate 2435 * ptebit is set, return success. 2436 */ 2437 PMAP_LOCK(pvo->pvo_pmap); 2438 if (!(pvo->pvo_vaddr & PVO_DEAD)) 2439 ret = MOEA64_PTE_SYNCH(mmu, pvo); 2440 PMAP_UNLOCK(pvo->pvo_pmap); 2441 2442 if (ret > 0) { 2443 atomic_set_32(&m->md.mdpg_attrs, 2444 ret & (LPTE_CHG | LPTE_REF)); 2445 if (ret & ptebit) { 2446 rv = TRUE; 2447 break; 2448 } 2449 } 2450 } 2451 PV_PAGE_UNLOCK(m); 2452 2453 return (rv); 2454 } 2455 2456 static u_int 2457 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2458 { 2459 u_int count; 2460 struct pvo_entry *pvo; 2461 int64_t ret; 2462 2463 /* 2464 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2465 * we can reset the right ones). 2466 */ 2467 powerpc_sync(); 2468 2469 /* 2470 * For each pvo entry, clear the pte's ptebit. 2471 */ 2472 count = 0; 2473 PV_PAGE_LOCK(m); 2474 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2475 ret = 0; 2476 2477 PMAP_LOCK(pvo->pvo_pmap); 2478 if (!(pvo->pvo_vaddr & PVO_DEAD)) 2479 ret = MOEA64_PTE_CLEAR(mmu, pvo, ptebit); 2480 PMAP_UNLOCK(pvo->pvo_pmap); 2481 2482 if (ret > 0 && (ret & ptebit)) 2483 count++; 2484 } 2485 atomic_clear_32(&m->md.mdpg_attrs, ptebit); 2486 PV_PAGE_UNLOCK(m); 2487 2488 return (count); 2489 } 2490 2491 boolean_t 2492 moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 2493 { 2494 struct pvo_entry *pvo, key; 2495 vm_offset_t ppa; 2496 int error = 0; 2497 2498 PMAP_LOCK(kernel_pmap); 2499 key.pvo_vaddr = ppa = pa & ~ADDR_POFF; 2500 for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key); 2501 ppa < pa + size; ppa += PAGE_SIZE, 2502 pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) { 2503 if (pvo == NULL || (pvo->pvo_pte.pa & LPTE_RPGN) != ppa) { 2504 error = EFAULT; 2505 break; 2506 } 2507 } 2508 PMAP_UNLOCK(kernel_pmap); 2509 2510 return (error); 2511 } 2512 2513 /* 2514 * Map a set of physical memory pages into the kernel virtual 2515 * address space. Return a pointer to where it is mapped. This 2516 * routine is intended to be used for mapping device memory, 2517 * NOT real memory. 2518 */ 2519 void * 2520 moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2521 { 2522 vm_offset_t va, tmpva, ppa, offset; 2523 2524 ppa = trunc_page(pa); 2525 offset = pa & PAGE_MASK; 2526 size = roundup2(offset + size, PAGE_SIZE); 2527 2528 va = kva_alloc(size); 2529 2530 if (!va) 2531 panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); 2532 2533 for (tmpva = va; size > 0;) { 2534 moea64_kenter_attr(mmu, tmpva, ppa, ma); 2535 size -= PAGE_SIZE; 2536 tmpva += PAGE_SIZE; 2537 ppa += PAGE_SIZE; 2538 } 2539 2540 return ((void *)(va + offset)); 2541 } 2542 2543 void * 2544 moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) 2545 { 2546 2547 return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); 2548 } 2549 2550 void 2551 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2552 { 2553 vm_offset_t base, offset; 2554 2555 base = trunc_page(va); 2556 offset = va & PAGE_MASK; 2557 size = roundup2(offset + size, PAGE_SIZE); 2558 2559 kva_free(base, size); 2560 } 2561 2562 void 2563 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2564 { 2565 struct pvo_entry *pvo; 2566 vm_offset_t lim; 2567 vm_paddr_t pa; 2568 vm_size_t len; 2569 2570 PMAP_LOCK(pm); 2571 while (sz > 0) { 2572 lim = round_page(va); 2573 len = MIN(lim - va, sz); 2574 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 2575 if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) { 2576 pa = (pvo->pvo_pte.pa & LPTE_RPGN) | (va & ADDR_POFF); 2577 moea64_syncicache(mmu, pm, va, pa, len); 2578 } 2579 va += len; 2580 sz -= len; 2581 } 2582 PMAP_UNLOCK(pm); 2583 } 2584 2585 void 2586 moea64_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va) 2587 { 2588 2589 *va = (void *)pa; 2590 } 2591 2592 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1]; 2593 2594 void 2595 moea64_scan_init(mmu_t mmu) 2596 { 2597 struct pvo_entry *pvo; 2598 vm_offset_t va; 2599 int i; 2600 2601 if (!do_minidump) { 2602 /* Initialize phys. segments for dumpsys(). */ 2603 memset(&dump_map, 0, sizeof(dump_map)); 2604 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 2605 for (i = 0; i < pregions_sz; i++) { 2606 dump_map[i].pa_start = pregions[i].mr_start; 2607 dump_map[i].pa_size = pregions[i].mr_size; 2608 } 2609 return; 2610 } 2611 2612 /* Virtual segments for minidumps: */ 2613 memset(&dump_map, 0, sizeof(dump_map)); 2614 2615 /* 1st: kernel .data and .bss. */ 2616 dump_map[0].pa_start = trunc_page((uintptr_t)_etext); 2617 dump_map[0].pa_size = round_page((uintptr_t)_end) - 2618 dump_map[0].pa_start; 2619 2620 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */ 2621 dump_map[1].pa_start = (vm_paddr_t)msgbufp->msg_ptr; 2622 dump_map[1].pa_size = round_page(msgbufp->msg_size); 2623 2624 /* 3rd: kernel VM. */ 2625 va = dump_map[1].pa_start + dump_map[1].pa_size; 2626 /* Find start of next chunk (from va). */ 2627 while (va < virtual_end) { 2628 /* Don't dump the buffer cache. */ 2629 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) { 2630 va = kmi.buffer_eva; 2631 continue; 2632 } 2633 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); 2634 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) 2635 break; 2636 va += PAGE_SIZE; 2637 } 2638 if (va < virtual_end) { 2639 dump_map[2].pa_start = va; 2640 va += PAGE_SIZE; 2641 /* Find last page in chunk. */ 2642 while (va < virtual_end) { 2643 /* Don't run into the buffer cache. */ 2644 if (va == kmi.buffer_sva) 2645 break; 2646 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF); 2647 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD)) 2648 break; 2649 va += PAGE_SIZE; 2650 } 2651 dump_map[2].pa_size = va - dump_map[2].pa_start; 2652 } 2653 } 2654 2655