1 /*- 2 * Copyright (c) 2001 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (C) 1995, 1996 Wolfgang Solfrank. 38 * Copyright (C) 1995, 1996 TooLs GmbH. 39 * All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. All advertising materials mentioning features or use of this software 50 * must display the following acknowledgement: 51 * This product includes software developed by TooLs GmbH. 52 * 4. The name of TooLs GmbH may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 * 66 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ 67 */ 68 /*- 69 * Copyright (C) 2001 Benno Rice. 70 * All rights reserved. 71 * 72 * Redistribution and use in source and binary forms, with or without 73 * modification, are permitted provided that the following conditions 74 * are met: 75 * 1. Redistributions of source code must retain the above copyright 76 * notice, this list of conditions and the following disclaimer. 77 * 2. Redistributions in binary form must reproduce the above copyright 78 * notice, this list of conditions and the following disclaimer in the 79 * documentation and/or other materials provided with the distribution. 80 * 81 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR 82 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 83 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 84 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 85 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 86 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 87 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 88 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 89 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 90 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 91 */ 92 93 #include <sys/cdefs.h> 94 __FBSDID("$FreeBSD$"); 95 96 /* 97 * Manages physical address maps. 98 * 99 * In addition to hardware address maps, this module is called upon to 100 * provide software-use-only maps which may or may not be stored in the 101 * same form as hardware maps. These pseudo-maps are used to store 102 * intermediate results from copy operations to and from address spaces. 103 * 104 * Since the information managed by this module is also stored by the 105 * logical address mapping module, this module may throw away valid virtual 106 * to physical mappings at almost any time. However, invalidations of 107 * mappings must be done as requested. 108 * 109 * In order to cope with hardware architectures which make virtual to 110 * physical map invalidates expensive, this module may delay invalidate 111 * reduced protection operations until such time as they are actually 112 * necessary. This module is given full information as to which processors 113 * are currently using which maps, and to when physical maps must be made 114 * correct. 115 */ 116 117 #include "opt_compat.h" 118 #include "opt_kstack_pages.h" 119 120 #include <sys/param.h> 121 #include <sys/kernel.h> 122 #include <sys/queue.h> 123 #include <sys/cpuset.h> 124 #include <sys/ktr.h> 125 #include <sys/lock.h> 126 #include <sys/msgbuf.h> 127 #include <sys/mutex.h> 128 #include <sys/proc.h> 129 #include <sys/sched.h> 130 #include <sys/sysctl.h> 131 #include <sys/systm.h> 132 #include <sys/vmmeter.h> 133 134 #include <sys/kdb.h> 135 136 #include <dev/ofw/openfirm.h> 137 138 #include <vm/vm.h> 139 #include <vm/vm_param.h> 140 #include <vm/vm_kern.h> 141 #include <vm/vm_page.h> 142 #include <vm/vm_map.h> 143 #include <vm/vm_object.h> 144 #include <vm/vm_extern.h> 145 #include <vm/vm_pageout.h> 146 #include <vm/vm_pager.h> 147 #include <vm/uma.h> 148 149 #include <machine/_inttypes.h> 150 #include <machine/cpu.h> 151 #include <machine/platform.h> 152 #include <machine/frame.h> 153 #include <machine/md_var.h> 154 #include <machine/psl.h> 155 #include <machine/bat.h> 156 #include <machine/hid.h> 157 #include <machine/pte.h> 158 #include <machine/sr.h> 159 #include <machine/trap.h> 160 #include <machine/mmuvar.h> 161 162 #include "mmu_oea64.h" 163 #include "mmu_if.h" 164 #include "moea64_if.h" 165 166 void moea64_release_vsid(uint64_t vsid); 167 uintptr_t moea64_get_unique_vsid(void); 168 169 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 170 #define ENABLE_TRANS(msr) mtmsr(msr) 171 172 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4)) 173 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) 174 #define VSID_HASH_MASK 0x0000007fffffffffULL 175 176 #define LOCK_TABLE() mtx_lock(&moea64_table_mutex) 177 #define UNLOCK_TABLE() mtx_unlock(&moea64_table_mutex); 178 #define ASSERT_TABLE_LOCK() mtx_assert(&moea64_table_mutex, MA_OWNED) 179 180 struct ofw_map { 181 cell_t om_va; 182 cell_t om_len; 183 cell_t om_pa_hi; 184 cell_t om_pa_lo; 185 cell_t om_mode; 186 }; 187 188 /* 189 * Map of physical memory regions. 190 */ 191 static struct mem_region *regions; 192 static struct mem_region *pregions; 193 static u_int phys_avail_count; 194 static int regions_sz, pregions_sz; 195 196 extern void bs_remap_earlyboot(void); 197 198 /* 199 * Lock for the pteg and pvo tables. 200 */ 201 struct mtx moea64_table_mutex; 202 struct mtx moea64_slb_mutex; 203 204 /* 205 * PTEG data. 206 */ 207 u_int moea64_pteg_count; 208 u_int moea64_pteg_mask; 209 210 /* 211 * PVO data. 212 */ 213 struct pvo_head *moea64_pvo_table; /* pvo entries by pteg index */ 214 struct pvo_head moea64_pvo_kunmanaged = /* list of unmanaged pages */ 215 LIST_HEAD_INITIALIZER(moea64_pvo_kunmanaged); 216 217 uma_zone_t moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */ 218 uma_zone_t moea64_mpvo_zone; /* zone for pvo entries for managed pages */ 219 220 #define BPVO_POOL_SIZE 327680 221 static struct pvo_entry *moea64_bpvo_pool; 222 static int moea64_bpvo_pool_index = 0; 223 224 #define VSID_NBPW (sizeof(u_int32_t) * 8) 225 #ifdef __powerpc64__ 226 #define NVSIDS (NPMAPS * 16) 227 #define VSID_HASHMASK 0xffffffffUL 228 #else 229 #define NVSIDS NPMAPS 230 #define VSID_HASHMASK 0xfffffUL 231 #endif 232 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW]; 233 234 static boolean_t moea64_initialized = FALSE; 235 236 /* 237 * Statistics. 238 */ 239 u_int moea64_pte_valid = 0; 240 u_int moea64_pte_overflow = 0; 241 u_int moea64_pvo_entries = 0; 242 u_int moea64_pvo_enter_calls = 0; 243 u_int moea64_pvo_remove_calls = 0; 244 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 245 &moea64_pte_valid, 0, ""); 246 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD, 247 &moea64_pte_overflow, 0, ""); 248 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 249 &moea64_pvo_entries, 0, ""); 250 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD, 251 &moea64_pvo_enter_calls, 0, ""); 252 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, 253 &moea64_pvo_remove_calls, 0, ""); 254 255 vm_offset_t moea64_scratchpage_va[2]; 256 struct pvo_entry *moea64_scratchpage_pvo[2]; 257 uintptr_t moea64_scratchpage_pte[2]; 258 struct mtx moea64_scratchpage_mtx; 259 260 uint64_t moea64_large_page_mask = 0; 261 int moea64_large_page_size = 0; 262 int moea64_large_page_shift = 0; 263 264 /* 265 * PVO calls. 266 */ 267 static int moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *, 268 vm_offset_t, vm_offset_t, uint64_t, int); 269 static void moea64_pvo_remove(mmu_t, struct pvo_entry *); 270 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); 271 272 /* 273 * Utility routines. 274 */ 275 static void moea64_enter_locked(mmu_t, pmap_t, vm_offset_t, 276 vm_page_t, vm_prot_t, boolean_t); 277 static boolean_t moea64_query_bit(mmu_t, vm_page_t, u_int64_t); 278 static u_int moea64_clear_bit(mmu_t, vm_page_t, u_int64_t); 279 static void moea64_kremove(mmu_t, vm_offset_t); 280 static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 281 vm_offset_t pa, vm_size_t sz); 282 283 /* 284 * Kernel MMU interface 285 */ 286 void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); 287 void moea64_clear_modify(mmu_t, vm_page_t); 288 void moea64_clear_reference(mmu_t, vm_page_t); 289 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); 290 void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t); 291 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t, 292 vm_prot_t); 293 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t); 294 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t); 295 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t); 296 void moea64_init(mmu_t); 297 boolean_t moea64_is_modified(mmu_t, vm_page_t); 298 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t); 299 boolean_t moea64_is_referenced(mmu_t, vm_page_t); 300 boolean_t moea64_ts_referenced(mmu_t, vm_page_t); 301 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, int); 302 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t); 303 int moea64_page_wired_mappings(mmu_t, vm_page_t); 304 void moea64_pinit(mmu_t, pmap_t); 305 void moea64_pinit0(mmu_t, pmap_t); 306 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); 307 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int); 308 void moea64_qremove(mmu_t, vm_offset_t, int); 309 void moea64_release(mmu_t, pmap_t); 310 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); 311 void moea64_remove_all(mmu_t, vm_page_t); 312 void moea64_remove_write(mmu_t, vm_page_t); 313 void moea64_zero_page(mmu_t, vm_page_t); 314 void moea64_zero_page_area(mmu_t, vm_page_t, int, int); 315 void moea64_zero_page_idle(mmu_t, vm_page_t); 316 void moea64_activate(mmu_t, struct thread *); 317 void moea64_deactivate(mmu_t, struct thread *); 318 void *moea64_mapdev(mmu_t, vm_offset_t, vm_size_t); 319 void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t); 320 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t); 321 vm_offset_t moea64_kextract(mmu_t, vm_offset_t); 322 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma); 323 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma); 324 void moea64_kenter(mmu_t, vm_offset_t, vm_offset_t); 325 boolean_t moea64_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); 326 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t); 327 328 static mmu_method_t moea64_methods[] = { 329 MMUMETHOD(mmu_change_wiring, moea64_change_wiring), 330 MMUMETHOD(mmu_clear_modify, moea64_clear_modify), 331 MMUMETHOD(mmu_clear_reference, moea64_clear_reference), 332 MMUMETHOD(mmu_copy_page, moea64_copy_page), 333 MMUMETHOD(mmu_enter, moea64_enter), 334 MMUMETHOD(mmu_enter_object, moea64_enter_object), 335 MMUMETHOD(mmu_enter_quick, moea64_enter_quick), 336 MMUMETHOD(mmu_extract, moea64_extract), 337 MMUMETHOD(mmu_extract_and_hold, moea64_extract_and_hold), 338 MMUMETHOD(mmu_init, moea64_init), 339 MMUMETHOD(mmu_is_modified, moea64_is_modified), 340 MMUMETHOD(mmu_is_prefaultable, moea64_is_prefaultable), 341 MMUMETHOD(mmu_is_referenced, moea64_is_referenced), 342 MMUMETHOD(mmu_ts_referenced, moea64_ts_referenced), 343 MMUMETHOD(mmu_map, moea64_map), 344 MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick), 345 MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings), 346 MMUMETHOD(mmu_pinit, moea64_pinit), 347 MMUMETHOD(mmu_pinit0, moea64_pinit0), 348 MMUMETHOD(mmu_protect, moea64_protect), 349 MMUMETHOD(mmu_qenter, moea64_qenter), 350 MMUMETHOD(mmu_qremove, moea64_qremove), 351 MMUMETHOD(mmu_release, moea64_release), 352 MMUMETHOD(mmu_remove, moea64_remove), 353 MMUMETHOD(mmu_remove_all, moea64_remove_all), 354 MMUMETHOD(mmu_remove_write, moea64_remove_write), 355 MMUMETHOD(mmu_sync_icache, moea64_sync_icache), 356 MMUMETHOD(mmu_zero_page, moea64_zero_page), 357 MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), 358 MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), 359 MMUMETHOD(mmu_activate, moea64_activate), 360 MMUMETHOD(mmu_deactivate, moea64_deactivate), 361 MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), 362 363 /* Internal interfaces */ 364 MMUMETHOD(mmu_mapdev, moea64_mapdev), 365 MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), 366 MMUMETHOD(mmu_unmapdev, moea64_unmapdev), 367 MMUMETHOD(mmu_kextract, moea64_kextract), 368 MMUMETHOD(mmu_kenter, moea64_kenter), 369 MMUMETHOD(mmu_kenter_attr, moea64_kenter_attr), 370 MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped), 371 372 { 0, 0 } 373 }; 374 375 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); 376 377 static __inline u_int 378 va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) 379 { 380 uint64_t hash; 381 int shift; 382 383 shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; 384 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> 385 shift); 386 return (hash & moea64_pteg_mask); 387 } 388 389 static __inline struct pvo_head * 390 vm_page_to_pvoh(vm_page_t m) 391 { 392 393 return (&m->md.mdpg_pvoh); 394 } 395 396 static __inline void 397 moea64_attr_clear(vm_page_t m, u_int64_t ptebit) 398 { 399 400 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 401 m->md.mdpg_attrs &= ~ptebit; 402 } 403 404 static __inline u_int64_t 405 moea64_attr_fetch(vm_page_t m) 406 { 407 408 return (m->md.mdpg_attrs); 409 } 410 411 static __inline void 412 moea64_attr_save(vm_page_t m, u_int64_t ptebit) 413 { 414 415 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 416 m->md.mdpg_attrs |= ptebit; 417 } 418 419 static __inline void 420 moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, 421 uint64_t pte_lo, int flags) 422 { 423 424 ASSERT_TABLE_LOCK(); 425 426 /* 427 * Construct a PTE. Default to IMB initially. Valid bit only gets 428 * set when the real pte is set in memory. 429 * 430 * Note: Don't set the valid bit for correct operation of tlb update. 431 */ 432 pt->pte_hi = (vsid << LPTE_VSID_SHIFT) | 433 (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API); 434 435 if (flags & PVO_LARGE) 436 pt->pte_hi |= LPTE_BIG; 437 438 pt->pte_lo = pte_lo; 439 } 440 441 static __inline uint64_t 442 moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) 443 { 444 uint64_t pte_lo; 445 int i; 446 447 if (ma != VM_MEMATTR_DEFAULT) { 448 switch (ma) { 449 case VM_MEMATTR_UNCACHEABLE: 450 return (LPTE_I | LPTE_G); 451 case VM_MEMATTR_WRITE_COMBINING: 452 case VM_MEMATTR_WRITE_BACK: 453 case VM_MEMATTR_PREFETCHABLE: 454 return (LPTE_I); 455 case VM_MEMATTR_WRITE_THROUGH: 456 return (LPTE_W | LPTE_M); 457 } 458 } 459 460 /* 461 * Assume the page is cache inhibited and access is guarded unless 462 * it's in our available memory array. 463 */ 464 pte_lo = LPTE_I | LPTE_G; 465 for (i = 0; i < pregions_sz; i++) { 466 if ((pa >= pregions[i].mr_start) && 467 (pa < (pregions[i].mr_start + pregions[i].mr_size))) { 468 pte_lo &= ~(LPTE_I | LPTE_G); 469 pte_lo |= LPTE_M; 470 break; 471 } 472 } 473 474 return pte_lo; 475 } 476 477 /* 478 * Quick sort callout for comparing memory regions. 479 */ 480 static int om_cmp(const void *a, const void *b); 481 482 static int 483 om_cmp(const void *a, const void *b) 484 { 485 const struct ofw_map *mapa; 486 const struct ofw_map *mapb; 487 488 mapa = a; 489 mapb = b; 490 if (mapa->om_pa_hi < mapb->om_pa_hi) 491 return (-1); 492 else if (mapa->om_pa_hi > mapb->om_pa_hi) 493 return (1); 494 else if (mapa->om_pa_lo < mapb->om_pa_lo) 495 return (-1); 496 else if (mapa->om_pa_lo > mapb->om_pa_lo) 497 return (1); 498 else 499 return (0); 500 } 501 502 static void 503 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) 504 { 505 struct ofw_map translations[sz/sizeof(struct ofw_map)]; 506 register_t msr; 507 vm_offset_t off; 508 vm_paddr_t pa_base; 509 int i; 510 511 bzero(translations, sz); 512 if (OF_getprop(mmu, "translations", translations, sz) == -1) 513 panic("moea64_bootstrap: can't get ofw translations"); 514 515 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations"); 516 sz /= sizeof(*translations); 517 qsort(translations, sz, sizeof (*translations), om_cmp); 518 519 for (i = 0; i < sz; i++) { 520 CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x", 521 (uint32_t)(translations[i].om_pa_lo), translations[i].om_va, 522 translations[i].om_len); 523 524 if (translations[i].om_pa_lo % PAGE_SIZE) 525 panic("OFW translation not page-aligned!"); 526 527 pa_base = translations[i].om_pa_lo; 528 529 #ifdef __powerpc64__ 530 pa_base += (vm_offset_t)translations[i].om_pa_hi << 32; 531 #else 532 if (translations[i].om_pa_hi) 533 panic("OFW translations above 32-bit boundary!"); 534 #endif 535 536 /* Now enter the pages for this mapping */ 537 538 DISABLE_TRANS(msr); 539 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) { 540 if (moea64_pvo_find_va(kernel_pmap, 541 translations[i].om_va + off) != NULL) 542 continue; 543 544 moea64_kenter(mmup, translations[i].om_va + off, 545 pa_base + off); 546 } 547 ENABLE_TRANS(msr); 548 } 549 } 550 551 #ifdef __powerpc64__ 552 static void 553 moea64_probe_large_page(void) 554 { 555 uint16_t pvr = mfpvr() >> 16; 556 557 switch (pvr) { 558 case IBM970: 559 case IBM970FX: 560 case IBM970MP: 561 powerpc_sync(); isync(); 562 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG); 563 powerpc_sync(); isync(); 564 565 /* FALLTHROUGH */ 566 case IBMCELLBE: 567 moea64_large_page_size = 0x1000000; /* 16 MB */ 568 moea64_large_page_shift = 24; 569 break; 570 default: 571 moea64_large_page_size = 0; 572 } 573 574 moea64_large_page_mask = moea64_large_page_size - 1; 575 } 576 577 static void 578 moea64_bootstrap_slb_prefault(vm_offset_t va, int large) 579 { 580 struct slb *cache; 581 struct slb entry; 582 uint64_t esid, slbe; 583 uint64_t i; 584 585 cache = PCPU_GET(slb); 586 esid = va >> ADDR_SR_SHFT; 587 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; 588 589 for (i = 0; i < 64; i++) { 590 if (cache[i].slbe == (slbe | i)) 591 return; 592 } 593 594 entry.slbe = slbe; 595 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT; 596 if (large) 597 entry.slbv |= SLBV_L; 598 599 slb_insert_kernel(entry.slbe, entry.slbv); 600 } 601 #endif 602 603 static void 604 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, 605 vm_offset_t kernelend) 606 { 607 register_t msr; 608 vm_paddr_t pa; 609 vm_offset_t size, off; 610 uint64_t pte_lo; 611 int i; 612 613 if (moea64_large_page_size == 0) 614 hw_direct_map = 0; 615 616 DISABLE_TRANS(msr); 617 if (hw_direct_map) { 618 PMAP_LOCK(kernel_pmap); 619 for (i = 0; i < pregions_sz; i++) { 620 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start + 621 pregions[i].mr_size; pa += moea64_large_page_size) { 622 pte_lo = LPTE_M; 623 624 /* 625 * Set memory access as guarded if prefetch within 626 * the page could exit the available physmem area. 627 */ 628 if (pa & moea64_large_page_mask) { 629 pa &= moea64_large_page_mask; 630 pte_lo |= LPTE_G; 631 } 632 if (pa + moea64_large_page_size > 633 pregions[i].mr_start + pregions[i].mr_size) 634 pte_lo |= LPTE_G; 635 636 moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone, 637 &moea64_pvo_kunmanaged, pa, pa, 638 pte_lo, PVO_WIRED | PVO_LARGE); 639 } 640 } 641 PMAP_UNLOCK(kernel_pmap); 642 } else { 643 size = sizeof(struct pvo_head) * moea64_pteg_count; 644 off = (vm_offset_t)(moea64_pvo_table); 645 for (pa = off; pa < off + size; pa += PAGE_SIZE) 646 moea64_kenter(mmup, pa, pa); 647 size = BPVO_POOL_SIZE*sizeof(struct pvo_entry); 648 off = (vm_offset_t)(moea64_bpvo_pool); 649 for (pa = off; pa < off + size; pa += PAGE_SIZE) 650 moea64_kenter(mmup, pa, pa); 651 652 /* 653 * Map certain important things, like ourselves. 654 * 655 * NOTE: We do not map the exception vector space. That code is 656 * used only in real mode, and leaving it unmapped allows us to 657 * catch NULL pointer deferences, instead of making NULL a valid 658 * address. 659 */ 660 661 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend; 662 pa += PAGE_SIZE) 663 moea64_kenter(mmup, pa, pa); 664 } 665 ENABLE_TRANS(msr); 666 } 667 668 void 669 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 670 { 671 int i, j; 672 vm_size_t physsz, hwphyssz; 673 674 #ifndef __powerpc64__ 675 /* We don't have a direct map since there is no BAT */ 676 hw_direct_map = 0; 677 678 /* Make sure battable is zero, since we have no BAT */ 679 for (i = 0; i < 16; i++) { 680 battable[i].batu = 0; 681 battable[i].batl = 0; 682 } 683 #else 684 moea64_probe_large_page(); 685 686 /* Use a direct map if we have large page support */ 687 if (moea64_large_page_size > 0) 688 hw_direct_map = 1; 689 else 690 hw_direct_map = 0; 691 #endif 692 693 /* Get physical memory regions from firmware */ 694 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); 695 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); 696 697 if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz) 698 panic("moea64_bootstrap: phys_avail too small"); 699 700 phys_avail_count = 0; 701 physsz = 0; 702 hwphyssz = 0; 703 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); 704 for (i = 0, j = 0; i < regions_sz; i++, j += 2) { 705 CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start, 706 regions[i].mr_start + regions[i].mr_size, 707 regions[i].mr_size); 708 if (hwphyssz != 0 && 709 (physsz + regions[i].mr_size) >= hwphyssz) { 710 if (physsz < hwphyssz) { 711 phys_avail[j] = regions[i].mr_start; 712 phys_avail[j + 1] = regions[i].mr_start + 713 hwphyssz - physsz; 714 physsz = hwphyssz; 715 phys_avail_count++; 716 } 717 break; 718 } 719 phys_avail[j] = regions[i].mr_start; 720 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size; 721 phys_avail_count++; 722 physsz += regions[i].mr_size; 723 } 724 725 /* Check for overlap with the kernel and exception vectors */ 726 for (j = 0; j < 2*phys_avail_count; j+=2) { 727 if (phys_avail[j] < EXC_LAST) 728 phys_avail[j] += EXC_LAST; 729 730 if (kernelstart >= phys_avail[j] && 731 kernelstart < phys_avail[j+1]) { 732 if (kernelend < phys_avail[j+1]) { 733 phys_avail[2*phys_avail_count] = 734 (kernelend & ~PAGE_MASK) + PAGE_SIZE; 735 phys_avail[2*phys_avail_count + 1] = 736 phys_avail[j+1]; 737 phys_avail_count++; 738 } 739 740 phys_avail[j+1] = kernelstart & ~PAGE_MASK; 741 } 742 743 if (kernelend >= phys_avail[j] && 744 kernelend < phys_avail[j+1]) { 745 if (kernelstart > phys_avail[j]) { 746 phys_avail[2*phys_avail_count] = phys_avail[j]; 747 phys_avail[2*phys_avail_count + 1] = 748 kernelstart & ~PAGE_MASK; 749 phys_avail_count++; 750 } 751 752 phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; 753 } 754 } 755 756 physmem = btoc(physsz); 757 758 #ifdef PTEGCOUNT 759 moea64_pteg_count = PTEGCOUNT; 760 #else 761 moea64_pteg_count = 0x1000; 762 763 while (moea64_pteg_count < physmem) 764 moea64_pteg_count <<= 1; 765 766 moea64_pteg_count >>= 1; 767 #endif /* PTEGCOUNT */ 768 } 769 770 void 771 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 772 { 773 vm_size_t size; 774 register_t msr; 775 int i; 776 777 /* 778 * Set PTEG mask 779 */ 780 moea64_pteg_mask = moea64_pteg_count - 1; 781 782 /* 783 * Allocate pv/overflow lists. 784 */ 785 size = sizeof(struct pvo_head) * moea64_pteg_count; 786 787 moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size, 788 PAGE_SIZE); 789 CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table); 790 791 DISABLE_TRANS(msr); 792 for (i = 0; i < moea64_pteg_count; i++) 793 LIST_INIT(&moea64_pvo_table[i]); 794 ENABLE_TRANS(msr); 795 796 /* 797 * Initialize the lock that synchronizes access to the pteg and pvo 798 * tables. 799 */ 800 mtx_init(&moea64_table_mutex, "pmap table", NULL, MTX_DEF | 801 MTX_RECURSE); 802 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); 803 804 /* 805 * Initialise the unmanaged pvo pool. 806 */ 807 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( 808 BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0); 809 moea64_bpvo_pool_index = 0; 810 811 /* 812 * Make sure kernel vsid is allocated as well as VSID 0. 813 */ 814 #ifndef __powerpc64__ 815 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW] 816 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW); 817 moea64_vsid_bitmap[0] |= 1; 818 #endif 819 820 /* 821 * Initialize the kernel pmap (which is statically allocated). 822 */ 823 #ifdef __powerpc64__ 824 for (i = 0; i < 64; i++) { 825 pcpup->pc_slb[i].slbv = 0; 826 pcpup->pc_slb[i].slbe = 0; 827 } 828 #else 829 for (i = 0; i < 16; i++) 830 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; 831 #endif 832 833 kernel_pmap->pmap_phys = kernel_pmap; 834 CPU_FILL(&kernel_pmap->pm_active); 835 LIST_INIT(&kernel_pmap->pmap_pvo); 836 837 PMAP_LOCK_INIT(kernel_pmap); 838 839 /* 840 * Now map in all the other buffers we allocated earlier 841 */ 842 843 moea64_setup_direct_map(mmup, kernelstart, kernelend); 844 } 845 846 void 847 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 848 { 849 ihandle_t mmui; 850 phandle_t chosen; 851 phandle_t mmu; 852 size_t sz; 853 int i; 854 vm_offset_t pa, va; 855 void *dpcpu; 856 857 /* 858 * Set up the Open Firmware pmap and add its mappings if not in real 859 * mode. 860 */ 861 862 chosen = OF_finddevice("/chosen"); 863 if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) { 864 mmu = OF_instance_to_package(mmui); 865 if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1) 866 sz = 0; 867 if (sz > 6144 /* tmpstksz - 2 KB headroom */) 868 panic("moea64_bootstrap: too many ofw translations"); 869 870 if (sz > 0) 871 moea64_add_ofw_mappings(mmup, mmu, sz); 872 } 873 874 /* 875 * Calculate the last available physical address. 876 */ 877 for (i = 0; phys_avail[i + 2] != 0; i += 2) 878 ; 879 Maxmem = powerpc_btop(phys_avail[i + 1]); 880 881 /* 882 * Initialize MMU and remap early physical mappings 883 */ 884 MMU_CPU_BOOTSTRAP(mmup,0); 885 mtmsr(mfmsr() | PSL_DR | PSL_IR); 886 pmap_bootstrapped++; 887 bs_remap_earlyboot(); 888 889 /* 890 * Set the start and end of kva. 891 */ 892 virtual_avail = VM_MIN_KERNEL_ADDRESS; 893 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 894 895 /* 896 * Map the entire KVA range into the SLB. We must not fault there. 897 */ 898 #ifdef __powerpc64__ 899 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH) 900 moea64_bootstrap_slb_prefault(va, 0); 901 #endif 902 903 /* 904 * Figure out how far we can extend virtual_end into segment 16 905 * without running into existing mappings. Segment 16 is guaranteed 906 * to contain neither RAM nor devices (at least on Apple hardware), 907 * but will generally contain some OFW mappings we should not 908 * step on. 909 */ 910 911 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */ 912 PMAP_LOCK(kernel_pmap); 913 while (virtual_end < VM_MAX_KERNEL_ADDRESS && 914 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL) 915 virtual_end += PAGE_SIZE; 916 PMAP_UNLOCK(kernel_pmap); 917 #endif 918 919 /* 920 * Allocate a kernel stack with a guard page for thread0 and map it 921 * into the kernel page map. 922 */ 923 pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE); 924 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE; 925 virtual_avail = va + KSTACK_PAGES * PAGE_SIZE; 926 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va); 927 thread0.td_kstack = va; 928 thread0.td_kstack_pages = KSTACK_PAGES; 929 for (i = 0; i < KSTACK_PAGES; i++) { 930 moea64_kenter(mmup, va, pa); 931 pa += PAGE_SIZE; 932 va += PAGE_SIZE; 933 } 934 935 /* 936 * Allocate virtual address space for the message buffer. 937 */ 938 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE); 939 msgbufp = (struct msgbuf *)virtual_avail; 940 va = virtual_avail; 941 virtual_avail += round_page(msgbufsize); 942 while (va < virtual_avail) { 943 moea64_kenter(mmup, va, pa); 944 pa += PAGE_SIZE; 945 va += PAGE_SIZE; 946 } 947 948 /* 949 * Allocate virtual address space for the dynamic percpu area. 950 */ 951 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE); 952 dpcpu = (void *)virtual_avail; 953 va = virtual_avail; 954 virtual_avail += DPCPU_SIZE; 955 while (va < virtual_avail) { 956 moea64_kenter(mmup, va, pa); 957 pa += PAGE_SIZE; 958 va += PAGE_SIZE; 959 } 960 dpcpu_init(dpcpu, 0); 961 962 /* 963 * Allocate some things for page zeroing. We put this directly 964 * in the page table, marked with LPTE_LOCKED, to avoid any 965 * of the PVO book-keeping or other parts of the VM system 966 * from even knowing that this hack exists. 967 */ 968 969 if (!hw_direct_map) { 970 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, 971 MTX_DEF); 972 for (i = 0; i < 2; i++) { 973 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; 974 virtual_end -= PAGE_SIZE; 975 976 moea64_kenter(mmup, moea64_scratchpage_va[i], 0); 977 978 moea64_scratchpage_pvo[i] = moea64_pvo_find_va( 979 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); 980 LOCK_TABLE(); 981 moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE( 982 mmup, moea64_scratchpage_pvo[i]); 983 moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi 984 |= LPTE_LOCKED; 985 MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i], 986 &moea64_scratchpage_pvo[i]->pvo_pte.lpte, 987 moea64_scratchpage_pvo[i]->pvo_vpn); 988 UNLOCK_TABLE(); 989 } 990 } 991 } 992 993 /* 994 * Activate a user pmap. The pmap must be activated before its address 995 * space can be accessed in any way. 996 */ 997 void 998 moea64_activate(mmu_t mmu, struct thread *td) 999 { 1000 pmap_t pm; 1001 1002 pm = &td->td_proc->p_vmspace->vm_pmap; 1003 CPU_SET(PCPU_GET(cpuid), &pm->pm_active); 1004 1005 #ifdef __powerpc64__ 1006 PCPU_SET(userslb, pm->pm_slb); 1007 #else 1008 PCPU_SET(curpmap, pm->pmap_phys); 1009 #endif 1010 } 1011 1012 void 1013 moea64_deactivate(mmu_t mmu, struct thread *td) 1014 { 1015 pmap_t pm; 1016 1017 pm = &td->td_proc->p_vmspace->vm_pmap; 1018 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active); 1019 #ifdef __powerpc64__ 1020 PCPU_SET(userslb, NULL); 1021 #else 1022 PCPU_SET(curpmap, NULL); 1023 #endif 1024 } 1025 1026 void 1027 moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) 1028 { 1029 struct pvo_entry *pvo; 1030 uintptr_t pt; 1031 uint64_t vsid; 1032 int i, ptegidx; 1033 1034 PMAP_LOCK(pm); 1035 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 1036 1037 if (pvo != NULL) { 1038 LOCK_TABLE(); 1039 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1040 1041 if (wired) { 1042 if ((pvo->pvo_vaddr & PVO_WIRED) == 0) 1043 pm->pm_stats.wired_count++; 1044 pvo->pvo_vaddr |= PVO_WIRED; 1045 pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; 1046 } else { 1047 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1048 pm->pm_stats.wired_count--; 1049 pvo->pvo_vaddr &= ~PVO_WIRED; 1050 pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; 1051 } 1052 1053 if (pt != -1) { 1054 /* Update wiring flag in page table. */ 1055 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1056 pvo->pvo_vpn); 1057 } else if (wired) { 1058 /* 1059 * If we are wiring the page, and it wasn't in the 1060 * page table before, add it. 1061 */ 1062 vsid = PVO_VSID(pvo); 1063 ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), 1064 pvo->pvo_vaddr & PVO_LARGE); 1065 1066 i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); 1067 1068 if (i >= 0) { 1069 PVO_PTEGIDX_CLR(pvo); 1070 PVO_PTEGIDX_SET(pvo, i); 1071 } 1072 } 1073 1074 UNLOCK_TABLE(); 1075 } 1076 PMAP_UNLOCK(pm); 1077 } 1078 1079 /* 1080 * This goes through and sets the physical address of our 1081 * special scratch PTE to the PA we want to zero or copy. Because 1082 * of locking issues (this can get called in pvo_enter() by 1083 * the UMA allocator), we can't use most other utility functions here 1084 */ 1085 1086 static __inline 1087 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { 1088 1089 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); 1090 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); 1091 1092 moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &= 1093 ~(LPTE_WIMG | LPTE_RPGN); 1094 moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |= 1095 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; 1096 MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which], 1097 &moea64_scratchpage_pvo[which]->pvo_pte.lpte, 1098 moea64_scratchpage_pvo[which]->pvo_vpn); 1099 isync(); 1100 } 1101 1102 void 1103 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) 1104 { 1105 vm_offset_t dst; 1106 vm_offset_t src; 1107 1108 dst = VM_PAGE_TO_PHYS(mdst); 1109 src = VM_PAGE_TO_PHYS(msrc); 1110 1111 if (hw_direct_map) { 1112 kcopy((void *)src, (void *)dst, PAGE_SIZE); 1113 } else { 1114 mtx_lock(&moea64_scratchpage_mtx); 1115 1116 moea64_set_scratchpage_pa(mmu, 0, src); 1117 moea64_set_scratchpage_pa(mmu, 1, dst); 1118 1119 kcopy((void *)moea64_scratchpage_va[0], 1120 (void *)moea64_scratchpage_va[1], PAGE_SIZE); 1121 1122 mtx_unlock(&moea64_scratchpage_mtx); 1123 } 1124 } 1125 1126 void 1127 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) 1128 { 1129 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1130 1131 if (size + off > PAGE_SIZE) 1132 panic("moea64_zero_page: size + off > PAGE_SIZE"); 1133 1134 if (hw_direct_map) { 1135 bzero((caddr_t)pa + off, size); 1136 } else { 1137 mtx_lock(&moea64_scratchpage_mtx); 1138 moea64_set_scratchpage_pa(mmu, 0, pa); 1139 bzero((caddr_t)moea64_scratchpage_va[0] + off, size); 1140 mtx_unlock(&moea64_scratchpage_mtx); 1141 } 1142 } 1143 1144 /* 1145 * Zero a page of physical memory by temporarily mapping it 1146 */ 1147 void 1148 moea64_zero_page(mmu_t mmu, vm_page_t m) 1149 { 1150 vm_offset_t pa = VM_PAGE_TO_PHYS(m); 1151 vm_offset_t va, off; 1152 1153 if (!hw_direct_map) { 1154 mtx_lock(&moea64_scratchpage_mtx); 1155 1156 moea64_set_scratchpage_pa(mmu, 0, pa); 1157 va = moea64_scratchpage_va[0]; 1158 } else { 1159 va = pa; 1160 } 1161 1162 for (off = 0; off < PAGE_SIZE; off += cacheline_size) 1163 __asm __volatile("dcbz 0,%0" :: "r"(va + off)); 1164 1165 if (!hw_direct_map) 1166 mtx_unlock(&moea64_scratchpage_mtx); 1167 } 1168 1169 void 1170 moea64_zero_page_idle(mmu_t mmu, vm_page_t m) 1171 { 1172 1173 moea64_zero_page(mmu, m); 1174 } 1175 1176 /* 1177 * Map the given physical page at the specified virtual address in the 1178 * target pmap with the protection requested. If specified the page 1179 * will be wired down. 1180 */ 1181 void 1182 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1183 vm_prot_t prot, boolean_t wired) 1184 { 1185 1186 vm_page_lock_queues(); 1187 PMAP_LOCK(pmap); 1188 moea64_enter_locked(mmu, pmap, va, m, prot, wired); 1189 vm_page_unlock_queues(); 1190 PMAP_UNLOCK(pmap); 1191 } 1192 1193 /* 1194 * Map the given physical page at the specified virtual address in the 1195 * target pmap with the protection requested. If specified the page 1196 * will be wired down. 1197 * 1198 * The page queues and pmap must be locked. 1199 */ 1200 1201 static void 1202 moea64_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 1203 vm_prot_t prot, boolean_t wired) 1204 { 1205 struct pvo_head *pvo_head; 1206 uma_zone_t zone; 1207 vm_page_t pg; 1208 uint64_t pte_lo; 1209 u_int pvo_flags; 1210 int error; 1211 1212 if (!moea64_initialized) { 1213 pvo_head = &moea64_pvo_kunmanaged; 1214 pg = NULL; 1215 zone = moea64_upvo_zone; 1216 pvo_flags = 0; 1217 } else { 1218 pvo_head = vm_page_to_pvoh(m); 1219 pg = m; 1220 zone = moea64_mpvo_zone; 1221 pvo_flags = PVO_MANAGED; 1222 } 1223 1224 if (pmap_bootstrapped) 1225 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1226 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1227 KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || 1228 VM_OBJECT_LOCKED(m->object), 1229 ("moea64_enter_locked: page %p is not busy", m)); 1230 1231 /* XXX change the pvo head for fake pages */ 1232 if ((m->oflags & VPO_UNMANAGED) != 0) { 1233 pvo_flags &= ~PVO_MANAGED; 1234 pvo_head = &moea64_pvo_kunmanaged; 1235 zone = moea64_upvo_zone; 1236 } 1237 1238 pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m)); 1239 1240 if (prot & VM_PROT_WRITE) { 1241 pte_lo |= LPTE_BW; 1242 if (pmap_bootstrapped && 1243 (m->oflags & VPO_UNMANAGED) == 0) 1244 vm_page_aflag_set(m, PGA_WRITEABLE); 1245 } else 1246 pte_lo |= LPTE_BR; 1247 1248 if ((prot & VM_PROT_EXECUTE) == 0) 1249 pte_lo |= LPTE_NOEXEC; 1250 1251 if (wired) 1252 pvo_flags |= PVO_WIRED; 1253 1254 error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va, 1255 VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags); 1256 1257 /* 1258 * Flush the page from the instruction cache if this page is 1259 * mapped executable and cacheable. 1260 */ 1261 if ((pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) 1262 moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); 1263 } 1264 1265 static void 1266 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, 1267 vm_size_t sz) 1268 { 1269 1270 /* 1271 * This is much trickier than on older systems because 1272 * we can't sync the icache on physical addresses directly 1273 * without a direct map. Instead we check a couple of cases 1274 * where the memory is already mapped in and, failing that, 1275 * use the same trick we use for page zeroing to create 1276 * a temporary mapping for this physical address. 1277 */ 1278 1279 if (!pmap_bootstrapped) { 1280 /* 1281 * If PMAP is not bootstrapped, we are likely to be 1282 * in real mode. 1283 */ 1284 __syncicache((void *)pa, sz); 1285 } else if (pmap == kernel_pmap) { 1286 __syncicache((void *)va, sz); 1287 } else if (hw_direct_map) { 1288 __syncicache((void *)pa, sz); 1289 } else { 1290 /* Use the scratch page to set up a temp mapping */ 1291 1292 mtx_lock(&moea64_scratchpage_mtx); 1293 1294 moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); 1295 __syncicache((void *)(moea64_scratchpage_va[1] + 1296 (va & ADDR_POFF)), sz); 1297 1298 mtx_unlock(&moea64_scratchpage_mtx); 1299 } 1300 } 1301 1302 /* 1303 * Maps a sequence of resident pages belonging to the same object. 1304 * The sequence begins with the given page m_start. This page is 1305 * mapped at the given virtual address start. Each subsequent page is 1306 * mapped at a virtual address that is offset from start by the same 1307 * amount as the page is offset from m_start within the object. The 1308 * last page in the sequence is the page with the largest offset from 1309 * m_start that can be mapped at a virtual address less than the given 1310 * virtual address end. Not every virtual page between start and end 1311 * is mapped; only those for which a resident page exists with the 1312 * corresponding offset from m_start are mapped. 1313 */ 1314 void 1315 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, 1316 vm_page_t m_start, vm_prot_t prot) 1317 { 1318 vm_page_t m; 1319 vm_pindex_t diff, psize; 1320 1321 psize = atop(end - start); 1322 m = m_start; 1323 vm_page_lock_queues(); 1324 PMAP_LOCK(pm); 1325 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 1326 moea64_enter_locked(mmu, pm, start + ptoa(diff), m, prot & 1327 (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1328 m = TAILQ_NEXT(m, listq); 1329 } 1330 vm_page_unlock_queues(); 1331 PMAP_UNLOCK(pm); 1332 } 1333 1334 void 1335 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, 1336 vm_prot_t prot) 1337 { 1338 1339 vm_page_lock_queues(); 1340 PMAP_LOCK(pm); 1341 moea64_enter_locked(mmu, pm, va, m, 1342 prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); 1343 vm_page_unlock_queues(); 1344 PMAP_UNLOCK(pm); 1345 } 1346 1347 vm_paddr_t 1348 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va) 1349 { 1350 struct pvo_entry *pvo; 1351 vm_paddr_t pa; 1352 1353 PMAP_LOCK(pm); 1354 pvo = moea64_pvo_find_va(pm, va); 1355 if (pvo == NULL) 1356 pa = 0; 1357 else 1358 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | 1359 (va - PVO_VADDR(pvo)); 1360 PMAP_UNLOCK(pm); 1361 return (pa); 1362 } 1363 1364 /* 1365 * Atomically extract and hold the physical page with the given 1366 * pmap and virtual address pair if that mapping permits the given 1367 * protection. 1368 */ 1369 vm_page_t 1370 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1371 { 1372 struct pvo_entry *pvo; 1373 vm_page_t m; 1374 vm_paddr_t pa; 1375 1376 m = NULL; 1377 pa = 0; 1378 PMAP_LOCK(pmap); 1379 retry: 1380 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1381 if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && 1382 ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || 1383 (prot & VM_PROT_WRITE) == 0)) { 1384 if (vm_page_pa_tryrelock(pmap, 1385 pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) 1386 goto retry; 1387 m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); 1388 vm_page_hold(m); 1389 } 1390 PA_UNLOCK_COND(pa); 1391 PMAP_UNLOCK(pmap); 1392 return (m); 1393 } 1394 1395 static mmu_t installed_mmu; 1396 1397 static void * 1398 moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 1399 { 1400 /* 1401 * This entire routine is a horrible hack to avoid bothering kmem 1402 * for new KVA addresses. Because this can get called from inside 1403 * kmem allocation routines, calling kmem for a new address here 1404 * can lead to multiply locking non-recursive mutexes. 1405 */ 1406 vm_offset_t va; 1407 1408 vm_page_t m; 1409 int pflags, needed_lock; 1410 1411 *flags = UMA_SLAB_PRIV; 1412 needed_lock = !PMAP_LOCKED(kernel_pmap); 1413 1414 if (needed_lock) 1415 PMAP_LOCK(kernel_pmap); 1416 1417 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 1418 pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED; 1419 else 1420 pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED; 1421 if (wait & M_ZERO) 1422 pflags |= VM_ALLOC_ZERO; 1423 1424 for (;;) { 1425 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 1426 if (m == NULL) { 1427 if (wait & M_NOWAIT) 1428 return (NULL); 1429 VM_WAIT; 1430 } else 1431 break; 1432 } 1433 1434 va = VM_PAGE_TO_PHYS(m); 1435 1436 moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone, 1437 &moea64_pvo_kunmanaged, va, VM_PAGE_TO_PHYS(m), LPTE_M, 1438 PVO_WIRED | PVO_BOOTSTRAP); 1439 1440 if (needed_lock) 1441 PMAP_UNLOCK(kernel_pmap); 1442 1443 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 1444 bzero((void *)va, PAGE_SIZE); 1445 1446 return (void *)va; 1447 } 1448 1449 extern int elf32_nxstack; 1450 1451 void 1452 moea64_init(mmu_t mmu) 1453 { 1454 1455 CTR0(KTR_PMAP, "moea64_init"); 1456 1457 moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry), 1458 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1459 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1460 moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry), 1461 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 1462 UMA_ZONE_VM | UMA_ZONE_NOFREE); 1463 1464 if (!hw_direct_map) { 1465 installed_mmu = mmu; 1466 uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc); 1467 uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc); 1468 } 1469 1470 #ifdef COMPAT_FREEBSD32 1471 elf32_nxstack = 1; 1472 #endif 1473 1474 moea64_initialized = TRUE; 1475 } 1476 1477 boolean_t 1478 moea64_is_referenced(mmu_t mmu, vm_page_t m) 1479 { 1480 1481 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1482 ("moea64_is_referenced: page %p is not managed", m)); 1483 return (moea64_query_bit(mmu, m, PTE_REF)); 1484 } 1485 1486 boolean_t 1487 moea64_is_modified(mmu_t mmu, vm_page_t m) 1488 { 1489 1490 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1491 ("moea64_is_modified: page %p is not managed", m)); 1492 1493 /* 1494 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 1495 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 1496 * is clear, no PTEs can have LPTE_CHG set. 1497 */ 1498 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1499 if ((m->oflags & VPO_BUSY) == 0 && 1500 (m->aflags & PGA_WRITEABLE) == 0) 1501 return (FALSE); 1502 return (moea64_query_bit(mmu, m, LPTE_CHG)); 1503 } 1504 1505 boolean_t 1506 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va) 1507 { 1508 struct pvo_entry *pvo; 1509 boolean_t rv; 1510 1511 PMAP_LOCK(pmap); 1512 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); 1513 rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0; 1514 PMAP_UNLOCK(pmap); 1515 return (rv); 1516 } 1517 1518 void 1519 moea64_clear_reference(mmu_t mmu, vm_page_t m) 1520 { 1521 1522 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1523 ("moea64_clear_reference: page %p is not managed", m)); 1524 moea64_clear_bit(mmu, m, LPTE_REF); 1525 } 1526 1527 void 1528 moea64_clear_modify(mmu_t mmu, vm_page_t m) 1529 { 1530 1531 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1532 ("moea64_clear_modify: page %p is not managed", m)); 1533 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1534 KASSERT((m->oflags & VPO_BUSY) == 0, 1535 ("moea64_clear_modify: page %p is busy", m)); 1536 1537 /* 1538 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG 1539 * set. If the object containing the page is locked and the page is 1540 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 1541 */ 1542 if ((m->aflags & PGA_WRITEABLE) == 0) 1543 return; 1544 moea64_clear_bit(mmu, m, LPTE_CHG); 1545 } 1546 1547 /* 1548 * Clear the write and modified bits in each of the given page's mappings. 1549 */ 1550 void 1551 moea64_remove_write(mmu_t mmu, vm_page_t m) 1552 { 1553 struct pvo_entry *pvo; 1554 uintptr_t pt; 1555 pmap_t pmap; 1556 uint64_t lo; 1557 1558 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1559 ("moea64_remove_write: page %p is not managed", m)); 1560 1561 /* 1562 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 1563 * another thread while the object is locked. Thus, if PGA_WRITEABLE 1564 * is clear, no page table entries need updating. 1565 */ 1566 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 1567 if ((m->oflags & VPO_BUSY) == 0 && 1568 (m->aflags & PGA_WRITEABLE) == 0) 1569 return; 1570 vm_page_lock_queues(); 1571 lo = moea64_attr_fetch(m); 1572 powerpc_sync(); 1573 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1574 pmap = pvo->pvo_pmap; 1575 PMAP_LOCK(pmap); 1576 LOCK_TABLE(); 1577 if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { 1578 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1579 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; 1580 pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; 1581 if (pt != -1) { 1582 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 1583 lo |= pvo->pvo_pte.lpte.pte_lo; 1584 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG; 1585 MOEA64_PTE_CHANGE(mmu, pt, 1586 &pvo->pvo_pte.lpte, pvo->pvo_vpn); 1587 if (pvo->pvo_pmap == kernel_pmap) 1588 isync(); 1589 } 1590 } 1591 UNLOCK_TABLE(); 1592 PMAP_UNLOCK(pmap); 1593 } 1594 if ((lo & LPTE_CHG) != 0) { 1595 moea64_attr_clear(m, LPTE_CHG); 1596 vm_page_dirty(m); 1597 } 1598 vm_page_aflag_clear(m, PGA_WRITEABLE); 1599 vm_page_unlock_queues(); 1600 } 1601 1602 /* 1603 * moea64_ts_referenced: 1604 * 1605 * Return a count of reference bits for a page, clearing those bits. 1606 * It is not necessary for every reference bit to be cleared, but it 1607 * is necessary that 0 only be returned when there are truly no 1608 * reference bits set. 1609 * 1610 * XXX: The exact number of bits to check and clear is a matter that 1611 * should be tested and standardized at some point in the future for 1612 * optimal aging of shared pages. 1613 */ 1614 boolean_t 1615 moea64_ts_referenced(mmu_t mmu, vm_page_t m) 1616 { 1617 1618 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1619 ("moea64_ts_referenced: page %p is not managed", m)); 1620 return (moea64_clear_bit(mmu, m, LPTE_REF)); 1621 } 1622 1623 /* 1624 * Modify the WIMG settings of all mappings for a page. 1625 */ 1626 void 1627 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) 1628 { 1629 struct pvo_entry *pvo; 1630 struct pvo_head *pvo_head; 1631 uintptr_t pt; 1632 pmap_t pmap; 1633 uint64_t lo; 1634 1635 if ((m->oflags & VPO_UNMANAGED) != 0) { 1636 m->md.mdpg_cache_attrs = ma; 1637 return; 1638 } 1639 1640 vm_page_lock_queues(); 1641 pvo_head = vm_page_to_pvoh(m); 1642 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma); 1643 LIST_FOREACH(pvo, pvo_head, pvo_vlink) { 1644 pmap = pvo->pvo_pmap; 1645 PMAP_LOCK(pmap); 1646 LOCK_TABLE(); 1647 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1648 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG; 1649 pvo->pvo_pte.lpte.pte_lo |= lo; 1650 if (pt != -1) { 1651 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1652 pvo->pvo_vpn); 1653 if (pvo->pvo_pmap == kernel_pmap) 1654 isync(); 1655 } 1656 UNLOCK_TABLE(); 1657 PMAP_UNLOCK(pmap); 1658 } 1659 m->md.mdpg_cache_attrs = ma; 1660 vm_page_unlock_queues(); 1661 } 1662 1663 /* 1664 * Map a wired page into kernel virtual address space. 1665 */ 1666 void 1667 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) 1668 { 1669 uint64_t pte_lo; 1670 int error; 1671 1672 pte_lo = moea64_calc_wimg(pa, ma); 1673 1674 PMAP_LOCK(kernel_pmap); 1675 error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone, 1676 &moea64_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED); 1677 1678 if (error != 0 && error != ENOENT) 1679 panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va, 1680 pa, error); 1681 1682 /* 1683 * Flush the memory from the instruction cache. 1684 */ 1685 if ((pte_lo & (LPTE_I | LPTE_G)) == 0) 1686 __syncicache((void *)va, PAGE_SIZE); 1687 PMAP_UNLOCK(kernel_pmap); 1688 } 1689 1690 void 1691 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) 1692 { 1693 1694 moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT); 1695 } 1696 1697 /* 1698 * Extract the physical page address associated with the given kernel virtual 1699 * address. 1700 */ 1701 vm_offset_t 1702 moea64_kextract(mmu_t mmu, vm_offset_t va) 1703 { 1704 struct pvo_entry *pvo; 1705 vm_paddr_t pa; 1706 1707 /* 1708 * Shortcut the direct-mapped case when applicable. We never put 1709 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS. 1710 */ 1711 if (va < VM_MIN_KERNEL_ADDRESS) 1712 return (va); 1713 1714 PMAP_LOCK(kernel_pmap); 1715 pvo = moea64_pvo_find_va(kernel_pmap, va); 1716 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR, 1717 va)); 1718 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo)); 1719 PMAP_UNLOCK(kernel_pmap); 1720 return (pa); 1721 } 1722 1723 /* 1724 * Remove a wired page from kernel virtual address space. 1725 */ 1726 void 1727 moea64_kremove(mmu_t mmu, vm_offset_t va) 1728 { 1729 moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE); 1730 } 1731 1732 /* 1733 * Map a range of physical addresses into kernel virtual address space. 1734 * 1735 * The value passed in *virt is a suggested virtual address for the mapping. 1736 * Architectures which can support a direct-mapped physical to virtual region 1737 * can return the appropriate address within that region, leaving '*virt' 1738 * unchanged. We cannot and therefore do not; *virt is updated with the 1739 * first usable address after the mapped region. 1740 */ 1741 vm_offset_t 1742 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, 1743 vm_offset_t pa_end, int prot) 1744 { 1745 vm_offset_t sva, va; 1746 1747 sva = *virt; 1748 va = sva; 1749 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE) 1750 moea64_kenter(mmu, va, pa_start); 1751 *virt = va; 1752 1753 return (sva); 1754 } 1755 1756 /* 1757 * Returns true if the pmap's pv is one of the first 1758 * 16 pvs linked to from this page. This count may 1759 * be changed upwards or downwards in the future; it 1760 * is only necessary that true be returned for a small 1761 * subset of pmaps for proper page aging. 1762 */ 1763 boolean_t 1764 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) 1765 { 1766 int loops; 1767 struct pvo_entry *pvo; 1768 boolean_t rv; 1769 1770 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1771 ("moea64_page_exists_quick: page %p is not managed", m)); 1772 loops = 0; 1773 rv = FALSE; 1774 vm_page_lock_queues(); 1775 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 1776 if (pvo->pvo_pmap == pmap) { 1777 rv = TRUE; 1778 break; 1779 } 1780 if (++loops >= 16) 1781 break; 1782 } 1783 vm_page_unlock_queues(); 1784 return (rv); 1785 } 1786 1787 /* 1788 * Return the number of managed mappings to the given physical page 1789 * that are wired. 1790 */ 1791 int 1792 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m) 1793 { 1794 struct pvo_entry *pvo; 1795 int count; 1796 1797 count = 0; 1798 if ((m->oflags & VPO_UNMANAGED) != 0) 1799 return (count); 1800 vm_page_lock_queues(); 1801 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) 1802 if ((pvo->pvo_vaddr & PVO_WIRED) != 0) 1803 count++; 1804 vm_page_unlock_queues(); 1805 return (count); 1806 } 1807 1808 static uintptr_t moea64_vsidcontext; 1809 1810 uintptr_t 1811 moea64_get_unique_vsid(void) { 1812 u_int entropy; 1813 register_t hash; 1814 uint32_t mask; 1815 int i; 1816 1817 entropy = 0; 1818 __asm __volatile("mftb %0" : "=r"(entropy)); 1819 1820 mtx_lock(&moea64_slb_mutex); 1821 for (i = 0; i < NVSIDS; i += VSID_NBPW) { 1822 u_int n; 1823 1824 /* 1825 * Create a new value by mutiplying by a prime and adding in 1826 * entropy from the timebase register. This is to make the 1827 * VSID more random so that the PT hash function collides 1828 * less often. (Note that the prime casues gcc to do shifts 1829 * instead of a multiply.) 1830 */ 1831 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy; 1832 hash = moea64_vsidcontext & (NVSIDS - 1); 1833 if (hash == 0) /* 0 is special, avoid it */ 1834 continue; 1835 n = hash >> 5; 1836 mask = 1 << (hash & (VSID_NBPW - 1)); 1837 hash = (moea64_vsidcontext & VSID_HASHMASK); 1838 if (moea64_vsid_bitmap[n] & mask) { /* collision? */ 1839 /* anything free in this bucket? */ 1840 if (moea64_vsid_bitmap[n] == 0xffffffff) { 1841 entropy = (moea64_vsidcontext >> 20); 1842 continue; 1843 } 1844 i = ffs(~moea64_vsid_bitmap[n]) - 1; 1845 mask = 1 << i; 1846 hash &= VSID_HASHMASK & ~(VSID_NBPW - 1); 1847 hash |= i; 1848 } 1849 KASSERT(!(moea64_vsid_bitmap[n] & mask), 1850 ("Allocating in-use VSID %#zx\n", hash)); 1851 moea64_vsid_bitmap[n] |= mask; 1852 mtx_unlock(&moea64_slb_mutex); 1853 return (hash); 1854 } 1855 1856 mtx_unlock(&moea64_slb_mutex); 1857 panic("%s: out of segments",__func__); 1858 } 1859 1860 #ifdef __powerpc64__ 1861 void 1862 moea64_pinit(mmu_t mmu, pmap_t pmap) 1863 { 1864 PMAP_LOCK_INIT(pmap); 1865 LIST_INIT(&pmap->pmap_pvo); 1866 1867 pmap->pm_slb_tree_root = slb_alloc_tree(); 1868 pmap->pm_slb = slb_alloc_user_cache(); 1869 pmap->pm_slb_len = 0; 1870 } 1871 #else 1872 void 1873 moea64_pinit(mmu_t mmu, pmap_t pmap) 1874 { 1875 int i; 1876 uint32_t hash; 1877 1878 PMAP_LOCK_INIT(pmap); 1879 LIST_INIT(&pmap->pmap_pvo); 1880 1881 if (pmap_bootstrapped) 1882 pmap->pmap_phys = (pmap_t)moea64_kextract(mmu, 1883 (vm_offset_t)pmap); 1884 else 1885 pmap->pmap_phys = pmap; 1886 1887 /* 1888 * Allocate some segment registers for this pmap. 1889 */ 1890 hash = moea64_get_unique_vsid(); 1891 1892 for (i = 0; i < 16; i++) 1893 pmap->pm_sr[i] = VSID_MAKE(i, hash); 1894 1895 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0")); 1896 } 1897 #endif 1898 1899 /* 1900 * Initialize the pmap associated with process 0. 1901 */ 1902 void 1903 moea64_pinit0(mmu_t mmu, pmap_t pm) 1904 { 1905 moea64_pinit(mmu, pm); 1906 bzero(&pm->pm_stats, sizeof(pm->pm_stats)); 1907 } 1908 1909 /* 1910 * Set the physical protection on the specified range of this map as requested. 1911 */ 1912 void 1913 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, 1914 vm_prot_t prot) 1915 { 1916 struct pvo_entry *pvo; 1917 uintptr_t pt; 1918 1919 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, 1920 eva, prot); 1921 1922 1923 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap, 1924 ("moea64_protect: non current pmap")); 1925 1926 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1927 moea64_remove(mmu, pm, sva, eva); 1928 return; 1929 } 1930 1931 vm_page_lock_queues(); 1932 PMAP_LOCK(pm); 1933 for (; sva < eva; sva += PAGE_SIZE) { 1934 pvo = moea64_pvo_find_va(pm, sva); 1935 if (pvo == NULL) 1936 continue; 1937 1938 /* 1939 * Grab the PTE pointer before we diddle with the cached PTE 1940 * copy. 1941 */ 1942 LOCK_TABLE(); 1943 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 1944 1945 /* 1946 * Change the protection of the page. 1947 */ 1948 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; 1949 pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; 1950 pvo->pvo_pte.lpte.pte_lo &= ~LPTE_NOEXEC; 1951 if ((prot & VM_PROT_EXECUTE) == 0) 1952 pvo->pvo_pte.lpte.pte_lo |= LPTE_NOEXEC; 1953 1954 /* 1955 * If the PVO is in the page table, update that pte as well. 1956 */ 1957 if (pt != -1) { 1958 MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, 1959 pvo->pvo_vpn); 1960 if ((pvo->pvo_pte.lpte.pte_lo & 1961 (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { 1962 moea64_syncicache(mmu, pm, sva, 1963 pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, 1964 PAGE_SIZE); 1965 } 1966 } 1967 UNLOCK_TABLE(); 1968 } 1969 vm_page_unlock_queues(); 1970 PMAP_UNLOCK(pm); 1971 } 1972 1973 /* 1974 * Map a list of wired pages into kernel virtual address space. This is 1975 * intended for temporary mappings which do not need page modification or 1976 * references recorded. Existing mappings in the region are overwritten. 1977 */ 1978 void 1979 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count) 1980 { 1981 while (count-- > 0) { 1982 moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); 1983 va += PAGE_SIZE; 1984 m++; 1985 } 1986 } 1987 1988 /* 1989 * Remove page mappings from kernel virtual address space. Intended for 1990 * temporary mappings entered by moea64_qenter. 1991 */ 1992 void 1993 moea64_qremove(mmu_t mmu, vm_offset_t va, int count) 1994 { 1995 while (count-- > 0) { 1996 moea64_kremove(mmu, va); 1997 va += PAGE_SIZE; 1998 } 1999 } 2000 2001 void 2002 moea64_release_vsid(uint64_t vsid) 2003 { 2004 int idx, mask; 2005 2006 mtx_lock(&moea64_slb_mutex); 2007 idx = vsid & (NVSIDS-1); 2008 mask = 1 << (idx % VSID_NBPW); 2009 idx /= VSID_NBPW; 2010 KASSERT(moea64_vsid_bitmap[idx] & mask, 2011 ("Freeing unallocated VSID %#jx", vsid)); 2012 moea64_vsid_bitmap[idx] &= ~mask; 2013 mtx_unlock(&moea64_slb_mutex); 2014 } 2015 2016 2017 void 2018 moea64_release(mmu_t mmu, pmap_t pmap) 2019 { 2020 2021 /* 2022 * Free segment registers' VSIDs 2023 */ 2024 #ifdef __powerpc64__ 2025 slb_free_tree(pmap); 2026 slb_free_user_cache(pmap->pm_slb); 2027 #else 2028 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0")); 2029 2030 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0])); 2031 #endif 2032 2033 PMAP_LOCK_DESTROY(pmap); 2034 } 2035 2036 /* 2037 * Remove the given range of addresses from the specified map. 2038 */ 2039 void 2040 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) 2041 { 2042 struct pvo_entry *pvo; 2043 2044 vm_page_lock_queues(); 2045 PMAP_LOCK(pm); 2046 if ((eva - sva)/PAGE_SIZE < 10) { 2047 for (; sva < eva; sva += PAGE_SIZE) { 2048 pvo = moea64_pvo_find_va(pm, sva); 2049 if (pvo != NULL) 2050 moea64_pvo_remove(mmu, pvo); 2051 } 2052 } else { 2053 LIST_FOREACH(pvo, &pm->pmap_pvo, pvo_plink) { 2054 if (PVO_VADDR(pvo) < sva || PVO_VADDR(pvo) >= eva) 2055 continue; 2056 moea64_pvo_remove(mmu, pvo); 2057 } 2058 } 2059 vm_page_unlock_queues(); 2060 PMAP_UNLOCK(pm); 2061 } 2062 2063 /* 2064 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove() 2065 * will reflect changes in pte's back to the vm_page. 2066 */ 2067 void 2068 moea64_remove_all(mmu_t mmu, vm_page_t m) 2069 { 2070 struct pvo_head *pvo_head; 2071 struct pvo_entry *pvo, *next_pvo; 2072 pmap_t pmap; 2073 2074 vm_page_lock_queues(); 2075 pvo_head = vm_page_to_pvoh(m); 2076 for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) { 2077 next_pvo = LIST_NEXT(pvo, pvo_vlink); 2078 2079 pmap = pvo->pvo_pmap; 2080 PMAP_LOCK(pmap); 2081 moea64_pvo_remove(mmu, pvo); 2082 PMAP_UNLOCK(pmap); 2083 } 2084 if ((m->aflags & PGA_WRITEABLE) && moea64_is_modified(mmu, m)) { 2085 moea64_attr_clear(m, LPTE_CHG); 2086 vm_page_dirty(m); 2087 } 2088 vm_page_aflag_clear(m, PGA_WRITEABLE); 2089 vm_page_unlock_queues(); 2090 } 2091 2092 /* 2093 * Allocate a physical page of memory directly from the phys_avail map. 2094 * Can only be called from moea64_bootstrap before avail start and end are 2095 * calculated. 2096 */ 2097 vm_offset_t 2098 moea64_bootstrap_alloc(vm_size_t size, u_int align) 2099 { 2100 vm_offset_t s, e; 2101 int i, j; 2102 2103 size = round_page(size); 2104 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 2105 if (align != 0) 2106 s = (phys_avail[i] + align - 1) & ~(align - 1); 2107 else 2108 s = phys_avail[i]; 2109 e = s + size; 2110 2111 if (s < phys_avail[i] || e > phys_avail[i + 1]) 2112 continue; 2113 2114 if (s + size > platform_real_maxaddr()) 2115 continue; 2116 2117 if (s == phys_avail[i]) { 2118 phys_avail[i] += size; 2119 } else if (e == phys_avail[i + 1]) { 2120 phys_avail[i + 1] -= size; 2121 } else { 2122 for (j = phys_avail_count * 2; j > i; j -= 2) { 2123 phys_avail[j] = phys_avail[j - 2]; 2124 phys_avail[j + 1] = phys_avail[j - 1]; 2125 } 2126 2127 phys_avail[i + 3] = phys_avail[i + 1]; 2128 phys_avail[i + 1] = s; 2129 phys_avail[i + 2] = e; 2130 phys_avail_count++; 2131 } 2132 2133 return (s); 2134 } 2135 panic("moea64_bootstrap_alloc: could not allocate memory"); 2136 } 2137 2138 static int 2139 moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, 2140 struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa, 2141 uint64_t pte_lo, int flags) 2142 { 2143 struct pvo_entry *pvo; 2144 uint64_t vsid; 2145 int first; 2146 u_int ptegidx; 2147 int i; 2148 int bootstrap; 2149 2150 /* 2151 * One nasty thing that can happen here is that the UMA calls to 2152 * allocate new PVOs need to map more memory, which calls pvo_enter(), 2153 * which calls UMA... 2154 * 2155 * We break the loop by detecting recursion and allocating out of 2156 * the bootstrap pool. 2157 */ 2158 2159 first = 0; 2160 bootstrap = (flags & PVO_BOOTSTRAP); 2161 2162 if (!moea64_initialized) 2163 bootstrap = 1; 2164 2165 /* 2166 * Compute the PTE Group index. 2167 */ 2168 va &= ~ADDR_POFF; 2169 vsid = va_to_vsid(pm, va); 2170 ptegidx = va_to_pteg(vsid, va, flags & PVO_LARGE); 2171 2172 /* 2173 * Remove any existing mapping for this page. Reuse the pvo entry if 2174 * there is a mapping. 2175 */ 2176 LOCK_TABLE(); 2177 2178 moea64_pvo_enter_calls++; 2179 2180 LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { 2181 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) { 2182 if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa && 2183 (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC | LPTE_PP)) 2184 == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) { 2185 if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) { 2186 /* Re-insert if spilled */ 2187 i = MOEA64_PTE_INSERT(mmu, ptegidx, 2188 &pvo->pvo_pte.lpte); 2189 if (i >= 0) 2190 PVO_PTEGIDX_SET(pvo, i); 2191 moea64_pte_overflow--; 2192 } 2193 UNLOCK_TABLE(); 2194 return (0); 2195 } 2196 moea64_pvo_remove(mmu, pvo); 2197 break; 2198 } 2199 } 2200 2201 /* 2202 * If we aren't overwriting a mapping, try to allocate. 2203 */ 2204 if (bootstrap) { 2205 if (moea64_bpvo_pool_index >= BPVO_POOL_SIZE) { 2206 panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd", 2207 moea64_bpvo_pool_index, BPVO_POOL_SIZE, 2208 BPVO_POOL_SIZE * sizeof(struct pvo_entry)); 2209 } 2210 pvo = &moea64_bpvo_pool[moea64_bpvo_pool_index]; 2211 moea64_bpvo_pool_index++; 2212 bootstrap = 1; 2213 } else { 2214 /* 2215 * Note: drop the table lock around the UMA allocation in 2216 * case the UMA allocator needs to manipulate the page 2217 * table. The mapping we are working with is already 2218 * protected by the PMAP lock. 2219 */ 2220 UNLOCK_TABLE(); 2221 pvo = uma_zalloc(zone, M_NOWAIT); 2222 LOCK_TABLE(); 2223 } 2224 2225 if (pvo == NULL) { 2226 UNLOCK_TABLE(); 2227 return (ENOMEM); 2228 } 2229 2230 moea64_pvo_entries++; 2231 pvo->pvo_vaddr = va; 2232 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT) 2233 | (vsid << 16); 2234 pvo->pvo_pmap = pm; 2235 LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink); 2236 pvo->pvo_vaddr &= ~ADDR_POFF; 2237 2238 if (flags & PVO_WIRED) 2239 pvo->pvo_vaddr |= PVO_WIRED; 2240 if (pvo_head != &moea64_pvo_kunmanaged) 2241 pvo->pvo_vaddr |= PVO_MANAGED; 2242 if (bootstrap) 2243 pvo->pvo_vaddr |= PVO_BOOTSTRAP; 2244 if (flags & PVO_LARGE) 2245 pvo->pvo_vaddr |= PVO_LARGE; 2246 2247 moea64_pte_create(&pvo->pvo_pte.lpte, vsid, va, 2248 (uint64_t)(pa) | pte_lo, flags); 2249 2250 /* 2251 * Add to pmap list 2252 */ 2253 LIST_INSERT_HEAD(&pm->pmap_pvo, pvo, pvo_plink); 2254 2255 /* 2256 * Remember if the list was empty and therefore will be the first 2257 * item. 2258 */ 2259 if (LIST_FIRST(pvo_head) == NULL) 2260 first = 1; 2261 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink); 2262 2263 if (pvo->pvo_vaddr & PVO_WIRED) { 2264 pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; 2265 pm->pm_stats.wired_count++; 2266 } 2267 pm->pm_stats.resident_count++; 2268 2269 /* 2270 * We hope this succeeds but it isn't required. 2271 */ 2272 i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); 2273 if (i >= 0) { 2274 PVO_PTEGIDX_SET(pvo, i); 2275 } else { 2276 panic("moea64_pvo_enter: overflow"); 2277 moea64_pte_overflow++; 2278 } 2279 2280 if (pm == kernel_pmap) 2281 isync(); 2282 2283 UNLOCK_TABLE(); 2284 2285 #ifdef __powerpc64__ 2286 /* 2287 * Make sure all our bootstrap mappings are in the SLB as soon 2288 * as virtual memory is switched on. 2289 */ 2290 if (!pmap_bootstrapped) 2291 moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE); 2292 #endif 2293 2294 return (first ? ENOENT : 0); 2295 } 2296 2297 static void 2298 moea64_pvo_remove(mmu_t mmu, struct pvo_entry *pvo) 2299 { 2300 uintptr_t pt; 2301 2302 /* 2303 * If there is an active pte entry, we need to deactivate it (and 2304 * save the ref & cfg bits). 2305 */ 2306 LOCK_TABLE(); 2307 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2308 if (pt != -1) { 2309 MOEA64_PTE_UNSET(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); 2310 PVO_PTEGIDX_CLR(pvo); 2311 } else { 2312 moea64_pte_overflow--; 2313 } 2314 2315 /* 2316 * Update our statistics. 2317 */ 2318 pvo->pvo_pmap->pm_stats.resident_count--; 2319 if (pvo->pvo_vaddr & PVO_WIRED) 2320 pvo->pvo_pmap->pm_stats.wired_count--; 2321 2322 /* 2323 * Save the REF/CHG bits into their cache if the page is managed. 2324 */ 2325 if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED) { 2326 struct vm_page *pg; 2327 2328 pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); 2329 if (pg != NULL) { 2330 moea64_attr_save(pg, pvo->pvo_pte.lpte.pte_lo & 2331 (LPTE_REF | LPTE_CHG)); 2332 } 2333 } 2334 2335 /* 2336 * Remove this PVO from the PV and pmap lists. 2337 */ 2338 LIST_REMOVE(pvo, pvo_vlink); 2339 LIST_REMOVE(pvo, pvo_plink); 2340 2341 /* 2342 * Remove this from the overflow list and return it to the pool 2343 * if we aren't going to reuse it. 2344 */ 2345 LIST_REMOVE(pvo, pvo_olink); 2346 2347 moea64_pvo_entries--; 2348 moea64_pvo_remove_calls++; 2349 2350 UNLOCK_TABLE(); 2351 2352 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP)) 2353 uma_zfree((pvo->pvo_vaddr & PVO_MANAGED) ? moea64_mpvo_zone : 2354 moea64_upvo_zone, pvo); 2355 } 2356 2357 static struct pvo_entry * 2358 moea64_pvo_find_va(pmap_t pm, vm_offset_t va) 2359 { 2360 struct pvo_entry *pvo; 2361 int ptegidx; 2362 uint64_t vsid; 2363 #ifdef __powerpc64__ 2364 uint64_t slbv; 2365 2366 if (pm == kernel_pmap) { 2367 slbv = kernel_va_to_slbv(va); 2368 } else { 2369 struct slb *slb; 2370 slb = user_va_to_slb_entry(pm, va); 2371 /* The page is not mapped if the segment isn't */ 2372 if (slb == NULL) 2373 return NULL; 2374 slbv = slb->slbv; 2375 } 2376 2377 vsid = (slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT; 2378 if (slbv & SLBV_L) 2379 va &= ~moea64_large_page_mask; 2380 else 2381 va &= ~ADDR_POFF; 2382 ptegidx = va_to_pteg(vsid, va, slbv & SLBV_L); 2383 #else 2384 va &= ~ADDR_POFF; 2385 vsid = va_to_vsid(pm, va); 2386 ptegidx = va_to_pteg(vsid, va, 0); 2387 #endif 2388 2389 LOCK_TABLE(); 2390 LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) { 2391 if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) 2392 break; 2393 } 2394 UNLOCK_TABLE(); 2395 2396 return (pvo); 2397 } 2398 2399 static boolean_t 2400 moea64_query_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2401 { 2402 struct pvo_entry *pvo; 2403 uintptr_t pt; 2404 2405 if (moea64_attr_fetch(m) & ptebit) 2406 return (TRUE); 2407 2408 vm_page_lock_queues(); 2409 2410 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2411 2412 /* 2413 * See if we saved the bit off. If so, cache it and return 2414 * success. 2415 */ 2416 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2417 moea64_attr_save(m, ptebit); 2418 vm_page_unlock_queues(); 2419 return (TRUE); 2420 } 2421 } 2422 2423 /* 2424 * No luck, now go through the hard part of looking at the PTEs 2425 * themselves. Sync so that any pending REF/CHG bits are flushed to 2426 * the PTEs. 2427 */ 2428 powerpc_sync(); 2429 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2430 2431 /* 2432 * See if this pvo has a valid PTE. if so, fetch the 2433 * REF/CHG bits from the valid PTE. If the appropriate 2434 * ptebit is set, cache it and return success. 2435 */ 2436 LOCK_TABLE(); 2437 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2438 if (pt != -1) { 2439 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 2440 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2441 UNLOCK_TABLE(); 2442 2443 moea64_attr_save(m, ptebit); 2444 vm_page_unlock_queues(); 2445 return (TRUE); 2446 } 2447 } 2448 UNLOCK_TABLE(); 2449 } 2450 2451 vm_page_unlock_queues(); 2452 return (FALSE); 2453 } 2454 2455 static u_int 2456 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) 2457 { 2458 u_int count; 2459 struct pvo_entry *pvo; 2460 uintptr_t pt; 2461 2462 vm_page_lock_queues(); 2463 2464 /* 2465 * Clear the cached value. 2466 */ 2467 moea64_attr_clear(m, ptebit); 2468 2469 /* 2470 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so 2471 * we can reset the right ones). note that since the pvo entries and 2472 * list heads are accessed via BAT0 and are never placed in the page 2473 * table, we don't have to worry about further accesses setting the 2474 * REF/CHG bits. 2475 */ 2476 powerpc_sync(); 2477 2478 /* 2479 * For each pvo entry, clear the pvo's ptebit. If this pvo has a 2480 * valid pte clear the ptebit from the valid pte. 2481 */ 2482 count = 0; 2483 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { 2484 2485 LOCK_TABLE(); 2486 pt = MOEA64_PVO_TO_PTE(mmu, pvo); 2487 if (pt != -1) { 2488 MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); 2489 if (pvo->pvo_pte.lpte.pte_lo & ptebit) { 2490 count++; 2491 MOEA64_PTE_CLEAR(mmu, pt, &pvo->pvo_pte.lpte, 2492 pvo->pvo_vpn, ptebit); 2493 } 2494 } 2495 pvo->pvo_pte.lpte.pte_lo &= ~ptebit; 2496 UNLOCK_TABLE(); 2497 } 2498 2499 vm_page_unlock_queues(); 2500 return (count); 2501 } 2502 2503 boolean_t 2504 moea64_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2505 { 2506 struct pvo_entry *pvo; 2507 vm_offset_t ppa; 2508 int error = 0; 2509 2510 PMAP_LOCK(kernel_pmap); 2511 for (ppa = pa & ~ADDR_POFF; ppa < pa + size; ppa += PAGE_SIZE) { 2512 pvo = moea64_pvo_find_va(kernel_pmap, ppa); 2513 if (pvo == NULL || 2514 (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) { 2515 error = EFAULT; 2516 break; 2517 } 2518 } 2519 PMAP_UNLOCK(kernel_pmap); 2520 2521 return (error); 2522 } 2523 2524 /* 2525 * Map a set of physical memory pages into the kernel virtual 2526 * address space. Return a pointer to where it is mapped. This 2527 * routine is intended to be used for mapping device memory, 2528 * NOT real memory. 2529 */ 2530 void * 2531 moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma) 2532 { 2533 vm_offset_t va, tmpva, ppa, offset; 2534 2535 ppa = trunc_page(pa); 2536 offset = pa & PAGE_MASK; 2537 size = roundup(offset + size, PAGE_SIZE); 2538 2539 va = kmem_alloc_nofault(kernel_map, size); 2540 2541 if (!va) 2542 panic("moea64_mapdev: Couldn't alloc kernel virtual memory"); 2543 2544 for (tmpva = va; size > 0;) { 2545 moea64_kenter_attr(mmu, tmpva, ppa, ma); 2546 size -= PAGE_SIZE; 2547 tmpva += PAGE_SIZE; 2548 ppa += PAGE_SIZE; 2549 } 2550 2551 return ((void *)(va + offset)); 2552 } 2553 2554 void * 2555 moea64_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) 2556 { 2557 2558 return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT); 2559 } 2560 2561 void 2562 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) 2563 { 2564 vm_offset_t base, offset; 2565 2566 base = trunc_page(va); 2567 offset = va & PAGE_MASK; 2568 size = roundup(offset + size, PAGE_SIZE); 2569 2570 kmem_free(kernel_map, base, size); 2571 } 2572 2573 void 2574 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) 2575 { 2576 struct pvo_entry *pvo; 2577 vm_offset_t lim; 2578 vm_paddr_t pa; 2579 vm_size_t len; 2580 2581 PMAP_LOCK(pm); 2582 while (sz > 0) { 2583 lim = round_page(va); 2584 len = MIN(lim - va, sz); 2585 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); 2586 if (pvo != NULL && !(pvo->pvo_pte.lpte.pte_lo & LPTE_I)) { 2587 pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | 2588 (va & ADDR_POFF); 2589 moea64_syncicache(mmu, pm, va, pa, len); 2590 } 2591 va += len; 2592 sz -= len; 2593 } 2594 PMAP_UNLOCK(pm); 2595 } 2596