1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause AND BSD-4-Clause 3 * 4 * Copyright (c) 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /*- 32 * Copyright (C) 1995, 1996 Wolfgang Solfrank. 33 * Copyright (C) 1995, 1996 TooLs GmbH. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by TooLs GmbH. 47 * 4. The name of TooLs GmbH may not be used to endorse or promote products 48 * derived from this software without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 51 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 52 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 53 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 54 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 55 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 56 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 57 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 58 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 59 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60 * 61 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ 62 */ 63 /*- 64 * Copyright (C) 2001 Benno Rice. 65 * All rights reserved. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR 77 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 78 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 79 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 80 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 81 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 82 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 83 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 84 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 85 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 86 */ 87 88 #include <sys/cdefs.h> 89 /* 90 * Native 64-bit page table operations for running without a hypervisor. 91 */ 92 93 #include <sys/param.h> 94 #include <sys/kernel.h> 95 #include <sys/ktr.h> 96 #include <sys/lock.h> 97 #include <sys/mutex.h> 98 #include <sys/proc.h> 99 #include <sys/sched.h> 100 #include <sys/sysctl.h> 101 #include <sys/systm.h> 102 #include <sys/rwlock.h> 103 #include <sys/endian.h> 104 105 #include <sys/kdb.h> 106 107 #include <vm/vm.h> 108 #include <vm/vm_param.h> 109 #include <vm/vm_kern.h> 110 #include <vm/vm_page.h> 111 #include <vm/vm_map.h> 112 #include <vm/vm_object.h> 113 #include <vm/vm_extern.h> 114 #include <vm/vm_pageout.h> 115 116 #include <machine/cpu.h> 117 #include <machine/hid.h> 118 #include <machine/md_var.h> 119 #include <machine/mmuvar.h> 120 121 #include "mmu_oea64.h" 122 123 #define PTESYNC() __asm __volatile("ptesync"); 124 #define TLBSYNC() __asm __volatile("tlbsync; ptesync"); 125 #define SYNC() __asm __volatile("sync"); 126 #define EIEIO() __asm __volatile("eieio"); 127 128 #define VSID_HASH_MASK 0x0000007fffffffffULL 129 130 /* POWER9 only permits a 64k partition table size. */ 131 #define PART_SIZE 0x10000 132 133 /* 134 * These values are derived from the POWER8 user manual Version 1.3 135 * (16-March-2016), 3.8.4 (large page support) and 3.8.16 (TLBIE Invalidate 136 * Entry instructions.) 137 * 138 * Notably: 139 * 140 * + POWER8 supports an MPSS (Multple Page Sizes per Segment) configuration 141 * of 4KB base, 16MB actual page size 142 * + RB[56:58] encoding for 16MB page == 100, RB[54:55] segment either 00 or 01 143 * + RB[56:58] encoding for 4K page == 000, RB[54:55] segment either 00 or 01 144 */ 145 146 /* Actual page sizes (to be used with tlbie, when L=0) */ 147 #define AP_4K 0x00 148 #define AP_16M 0x80 149 150 #define LPTE_KERNEL_VSID_BIT (KERNEL_VSID_BIT << \ 151 (16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT))) 152 153 /* Abbreviated Virtual Address Page - high bits */ 154 #define LPTE_AVA_PGNHI_MASK 0x0000000000000F80ULL 155 #define LPTE_AVA_PGNHI_SHIFT 7 156 157 /* Effective Address Page - low bits */ 158 #define EA_PAGELO_MASK 0x7ffULL 159 #define EA_PAGELO_SHIFT 11 160 161 static bool moea64_crop_tlbie; 162 static bool moea64_need_lock; 163 164 /* 165 * The tlbie instruction has two forms: an old one used by PowerISA 166 * 2.03 and prior, and a newer one used by PowerISA 2.06 and later. 167 * We need to support both. 168 */ 169 static __inline void 170 TLBIE(uint64_t vpn, uint64_t oldptehi) 171 { 172 #ifndef __powerpc64__ 173 register_t vpn_hi, vpn_lo; 174 register_t msr; 175 register_t scratch, intr; 176 #endif 177 178 static volatile u_int tlbie_lock = 0; 179 bool need_lock = moea64_need_lock; 180 181 vpn <<= ADDR_PIDX_SHFT; 182 183 /* Hobo spinlock: we need stronger guarantees than mutexes provide */ 184 if (need_lock) { 185 while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); 186 isync(); /* Flush instruction queue once lock acquired */ 187 188 if (moea64_crop_tlbie) { 189 vpn &= ~(0xffffULL << 48); 190 #ifdef __powerpc64__ 191 if ((oldptehi & LPTE_BIG) != 0) 192 __asm __volatile("tlbie %0, 1" :: "r"(vpn) : 193 "memory"); 194 else 195 __asm __volatile("tlbie %0, 0" :: "r"(vpn) : 196 "memory"); 197 __asm __volatile("eieio; tlbsync; ptesync" ::: 198 "memory"); 199 tlbie_lock = 0; 200 return; 201 #endif 202 } 203 } 204 205 #ifdef __powerpc64__ 206 /* 207 * If this page has LPTE_BIG set and is from userspace, then 208 * it must be a superpage with 4KB base/16MB actual page size. 209 */ 210 if ((oldptehi & LPTE_BIG) != 0 && 211 (oldptehi & LPTE_KERNEL_VSID_BIT) == 0) 212 vpn |= AP_16M; 213 214 __asm __volatile("tlbie %0, %1" :: "r"(vpn), "r"(0) : "memory"); 215 __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); 216 #else 217 vpn_hi = (uint32_t)(vpn >> 32); 218 vpn_lo = (uint32_t)vpn; 219 220 intr = intr_disable(); 221 __asm __volatile("\ 222 mfmsr %0; \ 223 mr %1, %0; \ 224 insrdi %1,%5,1,0; \ 225 mtmsrd %1; isync; \ 226 \ 227 sld %1,%2,%4; \ 228 or %1,%1,%3; \ 229 tlbie %1; \ 230 \ 231 mtmsrd %0; isync; \ 232 eieio; \ 233 tlbsync; \ 234 ptesync;" 235 : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) 236 : "memory"); 237 intr_restore(intr); 238 #endif 239 240 /* No barriers or special ops -- taken care of by ptesync above */ 241 if (need_lock) 242 tlbie_lock = 0; 243 } 244 245 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 246 #define ENABLE_TRANS(msr) mtmsr(msr) 247 248 /* 249 * PTEG data. 250 */ 251 static volatile struct lpte *moea64_pteg_table; 252 static struct rwlock moea64_eviction_lock; 253 254 static volatile struct pate *moea64_part_table; 255 256 /* 257 * Dump function. 258 */ 259 static void *moea64_dump_pmap_native(void *ctx, void *buf, 260 u_long *nbytes); 261 262 /* 263 * PTE calls. 264 */ 265 static int64_t moea64_pte_insert_native(struct pvo_entry *); 266 static int64_t moea64_pte_synch_native(struct pvo_entry *); 267 static int64_t moea64_pte_clear_native(struct pvo_entry *, uint64_t); 268 static int64_t moea64_pte_replace_native(struct pvo_entry *, int); 269 static int64_t moea64_pte_unset_native(struct pvo_entry *); 270 static int64_t moea64_pte_insert_sp_native(struct pvo_entry *); 271 static int64_t moea64_pte_unset_sp_native(struct pvo_entry *); 272 static int64_t moea64_pte_replace_sp_native(struct pvo_entry *); 273 274 /* 275 * Utility routines. 276 */ 277 static void moea64_bootstrap_native( 278 vm_offset_t kernelstart, vm_offset_t kernelend); 279 static void moea64_cpu_bootstrap_native(int ap); 280 static void tlbia(void); 281 static void moea64_install_native(void); 282 283 static struct pmap_funcs moea64_native_methods = { 284 .install = moea64_install_native, 285 286 /* Internal interfaces */ 287 .bootstrap = moea64_bootstrap_native, 288 .cpu_bootstrap = moea64_cpu_bootstrap_native, 289 .dumpsys_dump_pmap = moea64_dump_pmap_native, 290 }; 291 292 static struct moea64_funcs moea64_native_funcs = { 293 .pte_synch = moea64_pte_synch_native, 294 .pte_clear = moea64_pte_clear_native, 295 .pte_unset = moea64_pte_unset_native, 296 .pte_replace = moea64_pte_replace_native, 297 .pte_insert = moea64_pte_insert_native, 298 .pte_insert_sp = moea64_pte_insert_sp_native, 299 .pte_unset_sp = moea64_pte_unset_sp_native, 300 .pte_replace_sp = moea64_pte_replace_sp_native, 301 }; 302 303 MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, oea64_mmu); 304 305 static void 306 moea64_install_native(void) 307 { 308 309 /* Install the MOEA64 ops. */ 310 moea64_ops = &moea64_native_funcs; 311 312 moea64_install(); 313 } 314 315 static int64_t 316 moea64_pte_synch_native(struct pvo_entry *pvo) 317 { 318 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 319 uint64_t ptelo, pvo_ptevpn; 320 321 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 322 323 pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo); 324 325 rw_rlock(&moea64_eviction_lock); 326 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) { 327 /* Evicted */ 328 rw_runlock(&moea64_eviction_lock); 329 return (-1); 330 } 331 332 PTESYNC(); 333 ptelo = be64toh(pt->pte_lo); 334 335 rw_runlock(&moea64_eviction_lock); 336 337 return (ptelo & (LPTE_REF | LPTE_CHG)); 338 } 339 340 static int64_t 341 moea64_pte_clear_native(struct pvo_entry *pvo, uint64_t ptebit) 342 { 343 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 344 struct lpte properpt; 345 uint64_t ptelo; 346 347 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 348 349 moea64_pte_from_pvo(pvo, &properpt); 350 351 rw_rlock(&moea64_eviction_lock); 352 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 353 (properpt.pte_hi & LPTE_AVPN_MASK)) { 354 /* Evicted */ 355 rw_runlock(&moea64_eviction_lock); 356 return (-1); 357 } 358 359 if (ptebit == LPTE_REF) { 360 /* See "Resetting the Reference Bit" in arch manual */ 361 PTESYNC(); 362 /* 2-step here safe: precision is not guaranteed */ 363 ptelo = be64toh(pt->pte_lo); 364 365 /* One-byte store to avoid touching the C bit */ 366 ((volatile uint8_t *)(&pt->pte_lo))[6] = 367 #if BYTE_ORDER == BIG_ENDIAN 368 ((uint8_t *)(&properpt.pte_lo))[6]; 369 #else 370 ((uint8_t *)(&properpt.pte_lo))[1]; 371 #endif 372 rw_runlock(&moea64_eviction_lock); 373 374 critical_enter(); 375 TLBIE(pvo->pvo_vpn, properpt.pte_hi); 376 critical_exit(); 377 } else { 378 rw_runlock(&moea64_eviction_lock); 379 ptelo = moea64_pte_unset_native(pvo); 380 moea64_pte_insert_native(pvo); 381 } 382 383 return (ptelo & (LPTE_REF | LPTE_CHG)); 384 } 385 386 static __always_inline int64_t 387 moea64_pte_unset_locked(volatile struct lpte *pt, uint64_t vpn) 388 { 389 uint64_t ptelo, ptehi; 390 391 /* 392 * Invalidate the pte, briefly locking it to collect RC bits. No 393 * atomics needed since this is protected against eviction by the lock. 394 */ 395 isync(); 396 critical_enter(); 397 ptehi = (be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED; 398 pt->pte_hi = htobe64(ptehi); 399 PTESYNC(); 400 TLBIE(vpn, ptehi); 401 ptelo = be64toh(pt->pte_lo); 402 *((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ 403 critical_exit(); 404 405 /* Keep statistics */ 406 STAT_MOEA64(moea64_pte_valid--); 407 408 return (ptelo & (LPTE_CHG | LPTE_REF)); 409 } 410 411 static int64_t 412 moea64_pte_unset_native(struct pvo_entry *pvo) 413 { 414 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 415 int64_t ret; 416 uint64_t pvo_ptevpn; 417 418 pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo); 419 420 rw_rlock(&moea64_eviction_lock); 421 422 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) { 423 /* Evicted */ 424 STAT_MOEA64(moea64_pte_overflow--); 425 ret = -1; 426 } else 427 ret = moea64_pte_unset_locked(pt, pvo->pvo_vpn); 428 429 rw_runlock(&moea64_eviction_lock); 430 431 return (ret); 432 } 433 434 static int64_t 435 moea64_pte_replace_inval_native(struct pvo_entry *pvo, 436 volatile struct lpte *pt) 437 { 438 struct lpte properpt; 439 uint64_t ptelo, ptehi; 440 441 moea64_pte_from_pvo(pvo, &properpt); 442 443 rw_rlock(&moea64_eviction_lock); 444 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 445 (properpt.pte_hi & LPTE_AVPN_MASK)) { 446 /* Evicted */ 447 STAT_MOEA64(moea64_pte_overflow--); 448 rw_runlock(&moea64_eviction_lock); 449 return (-1); 450 } 451 452 /* 453 * Replace the pte, briefly locking it to collect RC bits. No 454 * atomics needed since this is protected against eviction by the lock. 455 */ 456 isync(); 457 critical_enter(); 458 ptehi = (be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED; 459 pt->pte_hi = htobe64(ptehi); 460 PTESYNC(); 461 TLBIE(pvo->pvo_vpn, ptehi); 462 ptelo = be64toh(pt->pte_lo); 463 EIEIO(); 464 pt->pte_lo = htobe64(properpt.pte_lo); 465 EIEIO(); 466 pt->pte_hi = htobe64(properpt.pte_hi); /* Release lock */ 467 PTESYNC(); 468 critical_exit(); 469 rw_runlock(&moea64_eviction_lock); 470 471 return (ptelo & (LPTE_CHG | LPTE_REF)); 472 } 473 474 static int64_t 475 moea64_pte_replace_native(struct pvo_entry *pvo, int flags) 476 { 477 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 478 struct lpte properpt; 479 int64_t ptelo; 480 481 if (flags == 0) { 482 /* Just some software bits changing. */ 483 moea64_pte_from_pvo(pvo, &properpt); 484 485 rw_rlock(&moea64_eviction_lock); 486 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 487 (properpt.pte_hi & LPTE_AVPN_MASK)) { 488 rw_runlock(&moea64_eviction_lock); 489 return (-1); 490 } 491 pt->pte_hi = htobe64(properpt.pte_hi); 492 ptelo = be64toh(pt->pte_lo); 493 rw_runlock(&moea64_eviction_lock); 494 } else { 495 /* Otherwise, need reinsertion and deletion */ 496 ptelo = moea64_pte_replace_inval_native(pvo, pt); 497 } 498 499 return (ptelo); 500 } 501 502 static void 503 moea64_cpu_bootstrap_native(int ap) 504 { 505 int i = 0; 506 #ifdef __powerpc64__ 507 struct slb *slb = PCPU_GET(aim.slb); 508 register_t seg0; 509 #endif 510 511 /* 512 * Initialize segment registers and MMU 513 */ 514 515 mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); 516 517 switch(mfpvr() >> 16) { 518 case IBMPOWER9: 519 mtspr(SPR_HID0, mfspr(SPR_HID0) & ~HID0_RADIX); 520 break; 521 } 522 523 /* 524 * Install kernel SLB entries 525 */ 526 527 #ifdef __powerpc64__ 528 __asm __volatile ("slbia"); 529 __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : 530 "r"(0)); 531 532 for (i = 0; i < n_slbs; i++) { 533 if (!(slb[i].slbe & SLBE_VALID)) 534 continue; 535 536 __asm __volatile ("slbmte %0, %1" :: 537 "r"(slb[i].slbv), "r"(slb[i].slbe)); 538 } 539 #else 540 for (i = 0; i < 16; i++) 541 mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); 542 #endif 543 544 /* 545 * Install page table 546 */ 547 548 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 549 mtspr(SPR_PTCR, 550 ((uintptr_t)moea64_part_table & ~DMAP_BASE_ADDRESS) | 551 flsl((PART_SIZE >> 12) - 1)); 552 else 553 __asm __volatile ("ptesync; mtsdr1 %0; isync" 554 :: "r"(((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS) 555 | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); 556 tlbia(); 557 } 558 559 static void 560 moea64_bootstrap_native(vm_offset_t kernelstart, vm_offset_t kernelend) 561 { 562 vm_size_t size; 563 vm_offset_t off; 564 vm_paddr_t pa; 565 register_t msr; 566 567 moea64_early_bootstrap(kernelstart, kernelend); 568 569 switch (mfpvr() >> 16) { 570 case IBMPOWER8: 571 case IBMPOWER8E: 572 case IBMPOWER8NVL: 573 case IBMPOWER9: 574 case IBMPOWER10: 575 case IBMPOWER11: 576 moea64_need_lock = false; 577 break; 578 case IBMPOWER4: 579 case IBMPOWER4PLUS: 580 case IBM970: 581 case IBM970FX: 582 case IBM970GX: 583 case IBM970MP: 584 moea64_crop_tlbie = true; 585 default: 586 moea64_need_lock = true; 587 } 588 /* 589 * Allocate PTEG table. 590 */ 591 592 size = moea64_pteg_count * sizeof(struct lpteg); 593 CTR2(KTR_PMAP, "moea64_bootstrap: %lu PTEGs, %lu bytes", 594 moea64_pteg_count, size); 595 rw_init(&moea64_eviction_lock, "pte eviction"); 596 597 /* 598 * We now need to allocate memory. This memory, to be allocated, 599 * has to reside in a page table. The page table we are about to 600 * allocate. We don't have BAT. So drop to data real mode for a minute 601 * as a measure of last resort. We do this a couple times. 602 */ 603 /* 604 * PTEG table must be aligned on a 256k boundary, but can be placed 605 * anywhere with that alignment on POWER ISA 3+ systems. On earlier 606 * systems, offset addition is done by the CPU with bitwise OR rather 607 * than addition, so the table must also be aligned on a boundary of 608 * its own size. Pick the larger of the two, which works on all 609 * systems. 610 */ 611 moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, 612 MAX(256*1024, size)); 613 if (hw_direct_map) 614 moea64_pteg_table = 615 (struct lpte *)PHYS_TO_DMAP((vm_offset_t)moea64_pteg_table); 616 /* Allocate partition table (ISA 3.0). */ 617 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { 618 moea64_part_table = 619 (struct pate *)moea64_bootstrap_alloc(PART_SIZE, PART_SIZE); 620 moea64_part_table = 621 (struct pate *)PHYS_TO_DMAP((vm_offset_t)moea64_part_table); 622 } 623 DISABLE_TRANS(msr); 624 bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count * 625 sizeof(struct lpteg)); 626 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { 627 bzero(__DEVOLATILE(void *, moea64_part_table), PART_SIZE); 628 moea64_part_table[0].pagetab = htobe64( 629 (DMAP_TO_PHYS((vm_offset_t)moea64_pteg_table)) | 630 (uintptr_t)(flsl((moea64_pteg_count - 1) >> 11))); 631 } 632 ENABLE_TRANS(msr); 633 634 CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); 635 636 moea64_mid_bootstrap(kernelstart, kernelend); 637 638 /* 639 * Add a mapping for the page table itself if there is no direct map. 640 */ 641 if (!hw_direct_map) { 642 size = moea64_pteg_count * sizeof(struct lpteg); 643 off = (vm_offset_t)(moea64_pteg_table); 644 DISABLE_TRANS(msr); 645 for (pa = off; pa < off + size; pa += PAGE_SIZE) 646 pmap_kenter(pa, pa); 647 ENABLE_TRANS(msr); 648 } 649 650 /* Bring up virtual memory */ 651 moea64_late_bootstrap(kernelstart, kernelend); 652 } 653 654 static void 655 tlbia(void) 656 { 657 vm_offset_t i; 658 #ifndef __powerpc64__ 659 register_t msr, scratch; 660 #endif 661 662 i = 0xc00; /* IS = 11 */ 663 switch (mfpvr() >> 16) { 664 case IBM970: 665 case IBM970FX: 666 case IBM970MP: 667 case IBM970GX: 668 case IBMPOWER4: 669 case IBMPOWER4PLUS: 670 case IBMPOWER5: 671 case IBMPOWER5PLUS: 672 i = 0; /* IS not supported */ 673 break; 674 } 675 676 TLBSYNC(); 677 678 for (; i < 0x400000; i += 0x00001000) { 679 #ifdef __powerpc64__ 680 __asm __volatile("tlbiel %0" :: "r"(i)); 681 #else 682 __asm __volatile("\ 683 mfmsr %0; \ 684 mr %1, %0; \ 685 insrdi %1,%3,1,0; \ 686 mtmsrd %1; \ 687 isync; \ 688 \ 689 tlbiel %2; \ 690 \ 691 mtmsrd %0; \ 692 isync;" 693 : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); 694 #endif 695 } 696 697 EIEIO(); 698 TLBSYNC(); 699 } 700 701 static int 702 atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi) 703 { 704 int ret; 705 #ifdef __powerpc64__ 706 uint64_t temp; 707 #else 708 uint32_t oldhihalf; 709 #endif 710 711 /* 712 * Note: in principle, if just the locked bit were set here, we 713 * could avoid needing the eviction lock. However, eviction occurs 714 * so rarely that it isn't worth bothering about in practice. 715 */ 716 #ifdef __powerpc64__ 717 /* 718 * Note: Success of this sequence has the side effect of invalidating 719 * the PTE, as we are setting it to LPTE_LOCKED and discarding the 720 * other bits, including LPTE_V. 721 */ 722 __asm __volatile ( 723 "1:\tldarx %1, 0, %3\n\t" /* load old value */ 724 "and. %0,%1,%4\n\t" /* check if any bits set */ 725 "bne 2f\n\t" /* exit if any set */ 726 "stdcx. %5, 0, %3\n\t" /* attempt to store */ 727 "bne- 1b\n\t" /* spin if failed */ 728 "li %0, 1\n\t" /* success - retval = 1 */ 729 "b 3f\n\t" /* we've succeeded */ 730 "2:\n\t" 731 "stdcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ 732 "li %0, 0\n\t" /* failure - retval = 0 */ 733 "3:\n\t" 734 : "=&r" (ret), "=&r"(temp), "=m" (pte->pte_hi) 735 : "r" ((volatile char *)&pte->pte_hi), 736 "r" (htobe64(bitmask)), "r" (htobe64(LPTE_LOCKED)), 737 "m" (pte->pte_hi) 738 : "cr0", "cr1", "cr2", "memory"); 739 *oldhi = be64toh(temp); 740 #else 741 /* 742 * This code is used on bridge mode only. 743 */ 744 __asm __volatile ( 745 "1:\tlwarx %1, 0, %3\n\t" /* load old value */ 746 "and. %0,%1,%4\n\t" /* check if any bits set */ 747 "bne 2f\n\t" /* exit if any set */ 748 "stwcx. %5, 0, %3\n\t" /* attempt to store */ 749 "bne- 1b\n\t" /* spin if failed */ 750 "li %0, 1\n\t" /* success - retval = 1 */ 751 "b 3f\n\t" /* we've succeeded */ 752 "2:\n\t" 753 "stwcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ 754 "li %0, 0\n\t" /* failure - retval = 0 */ 755 "3:\n\t" 756 : "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi) 757 : "r" ((volatile char *)&pte->pte_hi + 4), 758 "r" ((uint32_t)bitmask), "r" ((uint32_t)LPTE_LOCKED), 759 "m" (pte->pte_hi) 760 : "cr0", "cr1", "cr2", "memory"); 761 762 *oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf; 763 #endif 764 765 return (ret); 766 } 767 768 static uintptr_t 769 moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase, 770 uint64_t mask) 771 { 772 volatile struct lpte *pt; 773 uint64_t oldptehi, va; 774 uintptr_t k; 775 int i, j; 776 777 /* Start at a random slot */ 778 i = mftb() % 8; 779 for (j = 0; j < 8; j++) { 780 k = slotbase + (i + j) % 8; 781 pt = &moea64_pteg_table[k]; 782 /* Invalidate and seize lock only if no bits in mask set */ 783 if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */ 784 break; 785 } 786 787 if (j == 8) 788 return (-1); 789 790 if (oldptehi & LPTE_VALID) { 791 KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry")); 792 /* 793 * Need to invalidate old entry completely: see 794 * "Modifying a Page Table Entry". Need to reconstruct 795 * the virtual address for the outgoing entry to do that. 796 */ 797 va = oldptehi >> (ADDR_SR_SHFT - ADDR_API_SHFT64); 798 if (oldptehi & LPTE_HID) 799 va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & 800 (ADDR_PIDX >> ADDR_PIDX_SHFT); 801 else 802 va = ((k >> 3) ^ va) & (ADDR_PIDX >> ADDR_PIDX_SHFT); 803 va |= (oldptehi & LPTE_AVPN_MASK) << 804 (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); 805 PTESYNC(); 806 TLBIE(va, oldptehi); 807 STAT_MOEA64(moea64_pte_valid--); 808 STAT_MOEA64(moea64_pte_overflow++); 809 } 810 811 /* 812 * Update the PTE as per "Adding a Page Table Entry". Lock is released 813 * by setting the high doubleworld. 814 */ 815 pt->pte_lo = htobe64(pvo_pt->pte_lo); 816 EIEIO(); 817 pt->pte_hi = htobe64(pvo_pt->pte_hi); 818 PTESYNC(); 819 820 /* Keep statistics */ 821 STAT_MOEA64(moea64_pte_valid++); 822 823 return (k); 824 } 825 826 static __always_inline int64_t 827 moea64_pte_insert_locked(struct pvo_entry *pvo, struct lpte *insertpt, 828 uint64_t mask) 829 { 830 uintptr_t slot; 831 832 /* 833 * First try primary hash. 834 */ 835 slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot, 836 mask | LPTE_WIRED | LPTE_LOCKED); 837 if (slot != -1) { 838 pvo->pvo_pte.slot = slot; 839 return (0); 840 } 841 842 /* 843 * Now try secondary hash. 844 */ 845 pvo->pvo_vaddr ^= PVO_HID; 846 insertpt->pte_hi ^= LPTE_HID; 847 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 848 slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot, 849 mask | LPTE_WIRED | LPTE_LOCKED); 850 if (slot != -1) { 851 pvo->pvo_pte.slot = slot; 852 return (0); 853 } 854 855 return (-1); 856 } 857 858 static int64_t 859 moea64_pte_insert_native(struct pvo_entry *pvo) 860 { 861 struct lpte insertpt; 862 int64_t ret; 863 864 /* Initialize PTE */ 865 moea64_pte_from_pvo(pvo, &insertpt); 866 867 /* Make sure further insertion is locked out during evictions */ 868 rw_rlock(&moea64_eviction_lock); 869 870 pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ 871 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID); 872 if (ret == -1) { 873 /* 874 * Out of luck. Find a PTE to sacrifice. 875 */ 876 877 /* Lock out all insertions for a bit */ 878 if (!rw_try_upgrade(&moea64_eviction_lock)) { 879 rw_runlock(&moea64_eviction_lock); 880 rw_wlock(&moea64_eviction_lock); 881 } 882 /* Don't evict large pages */ 883 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_BIG); 884 rw_wunlock(&moea64_eviction_lock); 885 /* No freeable slots in either PTEG? We're hosed. */ 886 if (ret == -1) 887 panic("moea64_pte_insert: overflow"); 888 } else 889 rw_runlock(&moea64_eviction_lock); 890 891 return (0); 892 } 893 894 static void * 895 moea64_dump_pmap_native(void *ctx, void *buf, u_long *nbytes) 896 { 897 struct dump_context *dctx; 898 u_long ptex, ptex_end; 899 900 dctx = (struct dump_context *)ctx; 901 ptex = dctx->ptex; 902 ptex_end = ptex + dctx->blksz / sizeof(struct lpte); 903 ptex_end = MIN(ptex_end, dctx->ptex_end); 904 *nbytes = (ptex_end - ptex) * sizeof(struct lpte); 905 906 if (*nbytes == 0) 907 return (NULL); 908 909 dctx->ptex = ptex_end; 910 return (__DEVOLATILE(struct lpte *, moea64_pteg_table) + ptex); 911 } 912 913 static __always_inline uint64_t 914 moea64_vpn_from_pte(uint64_t ptehi, uintptr_t slot) 915 { 916 uint64_t pgn, pgnlo, vsid; 917 918 vsid = (ptehi & LPTE_AVA_MASK) >> LPTE_VSID_SHIFT; 919 if ((ptehi & LPTE_HID) != 0) 920 slot ^= (moea64_pteg_mask << 3); 921 pgnlo = ((vsid & VSID_HASH_MASK) ^ (slot >> 3)) & EA_PAGELO_MASK; 922 pgn = ((ptehi & LPTE_AVA_PGNHI_MASK) << (EA_PAGELO_SHIFT - 923 LPTE_AVA_PGNHI_SHIFT)) | pgnlo; 924 return ((vsid << 16) | pgn); 925 } 926 927 static __always_inline int64_t 928 moea64_pte_unset_sp_locked(struct pvo_entry *pvo) 929 { 930 volatile struct lpte *pt; 931 uint64_t ptehi, refchg, vpn; 932 vm_offset_t eva; 933 934 refchg = 0; 935 eva = PVO_VADDR(pvo) + HPT_SP_SIZE; 936 937 for (; pvo != NULL && PVO_VADDR(pvo) < eva; 938 pvo = RB_NEXT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo)) { 939 pt = moea64_pteg_table + pvo->pvo_pte.slot; 940 ptehi = be64toh(pt->pte_hi); 941 if ((ptehi & LPTE_AVPN_MASK) != 942 moea64_pte_vpn_from_pvo_vpn(pvo)) { 943 /* Evicted: invalidate new entry */ 944 STAT_MOEA64(moea64_pte_overflow--); 945 vpn = moea64_vpn_from_pte(ptehi, pvo->pvo_pte.slot); 946 CTR1(KTR_PMAP, "Evicted page in pte_unset_sp: vpn=%jx", 947 (uintmax_t)vpn); 948 /* Assume evicted page was modified */ 949 refchg |= LPTE_CHG; 950 } else 951 vpn = pvo->pvo_vpn; 952 953 refchg |= moea64_pte_unset_locked(pt, vpn); 954 } 955 956 return (refchg); 957 } 958 959 static int64_t 960 moea64_pte_unset_sp_native(struct pvo_entry *pvo) 961 { 962 uint64_t refchg; 963 964 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 965 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 966 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 967 968 rw_rlock(&moea64_eviction_lock); 969 refchg = moea64_pte_unset_sp_locked(pvo); 970 rw_runlock(&moea64_eviction_lock); 971 972 return (refchg); 973 } 974 975 static __always_inline int64_t 976 moea64_pte_insert_sp_locked(struct pvo_entry *pvo) 977 { 978 struct lpte insertpt; 979 int64_t ret; 980 vm_offset_t eva; 981 982 eva = PVO_VADDR(pvo) + HPT_SP_SIZE; 983 984 for (; pvo != NULL && PVO_VADDR(pvo) < eva; 985 pvo = RB_NEXT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo)) { 986 moea64_pte_from_pvo(pvo, &insertpt); 987 pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ 988 989 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID); 990 if (ret == -1) { 991 /* Lock out all insertions for a bit */ 992 if (!rw_try_upgrade(&moea64_eviction_lock)) { 993 rw_runlock(&moea64_eviction_lock); 994 rw_wlock(&moea64_eviction_lock); 995 } 996 /* Don't evict large pages */ 997 ret = moea64_pte_insert_locked(pvo, &insertpt, 998 LPTE_BIG); 999 rw_downgrade(&moea64_eviction_lock); 1000 /* No freeable slots in either PTEG? We're hosed. */ 1001 if (ret == -1) 1002 panic("moea64_pte_insert_sp: overflow"); 1003 } 1004 } 1005 1006 return (0); 1007 } 1008 1009 static int64_t 1010 moea64_pte_insert_sp_native(struct pvo_entry *pvo) 1011 { 1012 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 1013 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 1014 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 1015 1016 rw_rlock(&moea64_eviction_lock); 1017 moea64_pte_insert_sp_locked(pvo); 1018 rw_runlock(&moea64_eviction_lock); 1019 1020 return (0); 1021 } 1022 1023 static int64_t 1024 moea64_pte_replace_sp_native(struct pvo_entry *pvo) 1025 { 1026 uint64_t refchg; 1027 1028 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 1029 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 1030 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 1031 1032 rw_rlock(&moea64_eviction_lock); 1033 refchg = moea64_pte_unset_sp_locked(pvo); 1034 moea64_pte_insert_sp_locked(pvo); 1035 rw_runlock(&moea64_eviction_lock); 1036 1037 return (refchg); 1038 } 1039