1 /* 2 * pSeries_lpar.c 3 * Copyright (C) 2001 Todd Inglett, IBM Corporation 4 * 5 * pSeries LPAR support. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 /* Enables debugging of low-level hash table routines - careful! */ 23 #undef DEBUG 24 25 #include <linux/kernel.h> 26 #include <linux/dma-mapping.h> 27 #include <linux/console.h> 28 #include <linux/export.h> 29 #include <asm/processor.h> 30 #include <asm/mmu.h> 31 #include <asm/page.h> 32 #include <asm/pgtable.h> 33 #include <asm/machdep.h> 34 #include <asm/abs_addr.h> 35 #include <asm/mmu_context.h> 36 #include <asm/iommu.h> 37 #include <asm/tlbflush.h> 38 #include <asm/tlb.h> 39 #include <asm/prom.h> 40 #include <asm/cputable.h> 41 #include <asm/udbg.h> 42 #include <asm/smp.h> 43 #include <asm/trace.h> 44 45 #include "plpar_wrappers.h" 46 #include "pseries.h" 47 48 49 /* in hvCall.S */ 50 EXPORT_SYMBOL(plpar_hcall); 51 EXPORT_SYMBOL(plpar_hcall9); 52 EXPORT_SYMBOL(plpar_hcall_norets); 53 54 extern void pSeries_find_serial_port(void); 55 56 void vpa_init(int cpu) 57 { 58 int hwcpu = get_hard_smp_processor_id(cpu); 59 unsigned long addr; 60 long ret; 61 struct paca_struct *pp; 62 struct dtl_entry *dtl; 63 64 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 65 lppaca_of(cpu).vmxregs_in_use = 1; 66 67 addr = __pa(&lppaca_of(cpu)); 68 ret = register_vpa(hwcpu, addr); 69 70 if (ret) { 71 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " 72 "%lx failed with %ld\n", cpu, hwcpu, addr, ret); 73 return; 74 } 75 /* 76 * PAPR says this feature is SLB-Buffer but firmware never 77 * reports that. All SPLPAR support SLB shadow buffer. 78 */ 79 addr = __pa(&slb_shadow[cpu]); 80 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 81 ret = register_slb_shadow(hwcpu, addr); 82 if (ret) 83 pr_err("WARNING: SLB shadow buffer registration for " 84 "cpu %d (hw %d) of area %lx failed with %ld\n", 85 cpu, hwcpu, addr, ret); 86 } 87 88 /* 89 * Register dispatch trace log, if one has been allocated. 90 */ 91 pp = &paca[cpu]; 92 dtl = pp->dispatch_log; 93 if (dtl) { 94 pp->dtl_ridx = 0; 95 pp->dtl_curr = dtl; 96 lppaca_of(cpu).dtl_idx = 0; 97 98 /* hypervisor reads buffer length from this field */ 99 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; 100 ret = register_dtl(hwcpu, __pa(dtl)); 101 if (ret) 102 pr_err("WARNING: DTL registration of cpu %d (hw %d) " 103 "failed with %ld\n", smp_processor_id(), 104 hwcpu, ret); 105 lppaca_of(cpu).dtl_enable_mask = 2; 106 } 107 } 108 109 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 110 unsigned long va, unsigned long pa, 111 unsigned long rflags, unsigned long vflags, 112 int psize, int ssize) 113 { 114 unsigned long lpar_rc; 115 unsigned long flags; 116 unsigned long slot; 117 unsigned long hpte_v, hpte_r; 118 119 if (!(vflags & HPTE_V_BOLTED)) 120 pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " 121 "rflags=%lx, vflags=%lx, psize=%d)\n", 122 hpte_group, va, pa, rflags, vflags, psize); 123 124 hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; 125 hpte_r = hpte_encode_r(pa, psize) | rflags; 126 127 if (!(vflags & HPTE_V_BOLTED)) 128 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 129 130 /* Now fill in the actual HPTE */ 131 /* Set CEC cookie to 0 */ 132 /* Zero page = 0 */ 133 /* I-cache Invalidate = 0 */ 134 /* I-cache synchronize = 0 */ 135 /* Exact = 0 */ 136 flags = 0; 137 138 /* Make pHyp happy */ 139 if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) 140 hpte_r &= ~_PAGE_COHERENT; 141 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) 142 flags |= H_COALESCE_CAND; 143 144 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); 145 if (unlikely(lpar_rc == H_PTEG_FULL)) { 146 if (!(vflags & HPTE_V_BOLTED)) 147 pr_devel(" full\n"); 148 return -1; 149 } 150 151 /* 152 * Since we try and ioremap PHBs we don't own, the pte insert 153 * will fail. However we must catch the failure in hash_page 154 * or we will loop forever, so return -2 in this case. 155 */ 156 if (unlikely(lpar_rc != H_SUCCESS)) { 157 if (!(vflags & HPTE_V_BOLTED)) 158 pr_devel(" lpar err %lu\n", lpar_rc); 159 return -2; 160 } 161 if (!(vflags & HPTE_V_BOLTED)) 162 pr_devel(" -> slot: %lu\n", slot & 7); 163 164 /* Because of iSeries, we have to pass down the secondary 165 * bucket bit here as well 166 */ 167 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); 168 } 169 170 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); 171 172 static long pSeries_lpar_hpte_remove(unsigned long hpte_group) 173 { 174 unsigned long slot_offset; 175 unsigned long lpar_rc; 176 int i; 177 unsigned long dummy1, dummy2; 178 179 /* pick a random slot to start at */ 180 slot_offset = mftb() & 0x7; 181 182 for (i = 0; i < HPTES_PER_GROUP; i++) { 183 184 /* don't remove a bolted entry */ 185 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 186 (0x1UL << 4), &dummy1, &dummy2); 187 if (lpar_rc == H_SUCCESS) 188 return i; 189 BUG_ON(lpar_rc != H_NOT_FOUND); 190 191 slot_offset++; 192 slot_offset &= 0x7; 193 } 194 195 return -1; 196 } 197 198 static void pSeries_lpar_hptab_clear(void) 199 { 200 unsigned long size_bytes = 1UL << ppc64_pft_size; 201 unsigned long hpte_count = size_bytes >> 4; 202 struct { 203 unsigned long pteh; 204 unsigned long ptel; 205 } ptes[4]; 206 long lpar_rc; 207 unsigned long i, j; 208 209 /* Read in batches of 4, 210 * invalidate only valid entries not in the VRMA 211 * hpte_count will be a multiple of 4 212 */ 213 for (i = 0; i < hpte_count; i += 4) { 214 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); 215 if (lpar_rc != H_SUCCESS) 216 continue; 217 for (j = 0; j < 4; j++){ 218 if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == 219 HPTE_V_VRMA_MASK) 220 continue; 221 if (ptes[j].pteh & HPTE_V_VALID) 222 plpar_pte_remove_raw(0, i + j, 0, 223 &(ptes[j].pteh), &(ptes[j].ptel)); 224 } 225 } 226 } 227 228 /* 229 * This computes the AVPN and B fields of the first dword of a HPTE, 230 * for use when we want to match an existing PTE. The bottom 7 bits 231 * of the returned value are zero. 232 */ 233 static inline unsigned long hpte_encode_avpn(unsigned long va, int psize, 234 int ssize) 235 { 236 unsigned long v; 237 238 v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); 239 v <<= HPTE_V_AVPN_SHIFT; 240 v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; 241 return v; 242 } 243 244 /* 245 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and 246 * the low 3 bits of flags happen to line up. So no transform is needed. 247 * We can probably optimize here and assume the high bits of newpp are 248 * already zero. For now I am paranoid. 249 */ 250 static long pSeries_lpar_hpte_updatepp(unsigned long slot, 251 unsigned long newpp, 252 unsigned long va, 253 int psize, int ssize, int local) 254 { 255 unsigned long lpar_rc; 256 unsigned long flags = (newpp & 7) | H_AVPN; 257 unsigned long want_v; 258 259 want_v = hpte_encode_avpn(va, psize, ssize); 260 261 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 262 want_v, slot, flags, psize); 263 264 lpar_rc = plpar_pte_protect(flags, slot, want_v); 265 266 if (lpar_rc == H_NOT_FOUND) { 267 pr_devel("not found !\n"); 268 return -1; 269 } 270 271 pr_devel("ok\n"); 272 273 BUG_ON(lpar_rc != H_SUCCESS); 274 275 return 0; 276 } 277 278 static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) 279 { 280 unsigned long dword0; 281 unsigned long lpar_rc; 282 unsigned long dummy_word1; 283 unsigned long flags; 284 285 /* Read 1 pte at a time */ 286 /* Do not need RPN to logical page translation */ 287 /* No cross CEC PFT access */ 288 flags = 0; 289 290 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); 291 292 BUG_ON(lpar_rc != H_SUCCESS); 293 294 return dword0; 295 } 296 297 static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize) 298 { 299 unsigned long hash; 300 unsigned long i; 301 long slot; 302 unsigned long want_v, hpte_v; 303 304 hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); 305 want_v = hpte_encode_avpn(va, psize, ssize); 306 307 /* Bolted entries are always in the primary group */ 308 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 309 for (i = 0; i < HPTES_PER_GROUP; i++) { 310 hpte_v = pSeries_lpar_hpte_getword0(slot); 311 312 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 313 /* HPTE matches */ 314 return slot; 315 ++slot; 316 } 317 318 return -1; 319 } 320 321 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 322 unsigned long ea, 323 int psize, int ssize) 324 { 325 unsigned long lpar_rc, slot, vsid, va, flags; 326 327 vsid = get_kernel_vsid(ea, ssize); 328 va = hpt_va(ea, vsid, ssize); 329 330 slot = pSeries_lpar_hpte_find(va, psize, ssize); 331 BUG_ON(slot == -1); 332 333 flags = newpp & 7; 334 lpar_rc = plpar_pte_protect(flags, slot, 0); 335 336 BUG_ON(lpar_rc != H_SUCCESS); 337 } 338 339 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, 340 int psize, int ssize, int local) 341 { 342 unsigned long want_v; 343 unsigned long lpar_rc; 344 unsigned long dummy1, dummy2; 345 346 pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", 347 slot, va, psize, local); 348 349 want_v = hpte_encode_avpn(va, psize, ssize); 350 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); 351 if (lpar_rc == H_NOT_FOUND) 352 return; 353 354 BUG_ON(lpar_rc != H_SUCCESS); 355 } 356 357 static void pSeries_lpar_hpte_removebolted(unsigned long ea, 358 int psize, int ssize) 359 { 360 unsigned long slot, vsid, va; 361 362 vsid = get_kernel_vsid(ea, ssize); 363 va = hpt_va(ea, vsid, ssize); 364 365 slot = pSeries_lpar_hpte_find(va, psize, ssize); 366 BUG_ON(slot == -1); 367 368 pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0); 369 } 370 371 /* Flag bits for H_BULK_REMOVE */ 372 #define HBR_REQUEST 0x4000000000000000UL 373 #define HBR_RESPONSE 0x8000000000000000UL 374 #define HBR_END 0xc000000000000000UL 375 #define HBR_AVPN 0x0200000000000000UL 376 #define HBR_ANDCOND 0x0100000000000000UL 377 378 /* 379 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 380 * lock. 381 */ 382 static void pSeries_lpar_flush_hash_range(unsigned long number, int local) 383 { 384 unsigned long i, pix, rc; 385 unsigned long flags = 0; 386 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 387 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 388 unsigned long param[9]; 389 unsigned long va; 390 unsigned long hash, index, shift, hidx, slot; 391 real_pte_t pte; 392 int psize, ssize; 393 394 if (lock_tlbie) 395 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 396 397 psize = batch->psize; 398 ssize = batch->ssize; 399 pix = 0; 400 for (i = 0; i < number; i++) { 401 va = batch->vaddr[i]; 402 pte = batch->pte[i]; 403 pte_iterate_hashed_subpages(pte, psize, va, index, shift) { 404 hash = hpt_hash(va, shift, ssize); 405 hidx = __rpte_to_hidx(pte, index); 406 if (hidx & _PTEIDX_SECONDARY) 407 hash = ~hash; 408 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 409 slot += hidx & _PTEIDX_GROUP_IX; 410 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 411 pSeries_lpar_hpte_invalidate(slot, va, psize, 412 ssize, local); 413 } else { 414 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 415 param[pix+1] = hpte_encode_avpn(va, psize, 416 ssize); 417 pix += 2; 418 if (pix == 8) { 419 rc = plpar_hcall9(H_BULK_REMOVE, param, 420 param[0], param[1], param[2], 421 param[3], param[4], param[5], 422 param[6], param[7]); 423 BUG_ON(rc != H_SUCCESS); 424 pix = 0; 425 } 426 } 427 } pte_iterate_hashed_end(); 428 } 429 if (pix) { 430 param[pix] = HBR_END; 431 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], 432 param[2], param[3], param[4], param[5], 433 param[6], param[7]); 434 BUG_ON(rc != H_SUCCESS); 435 } 436 437 if (lock_tlbie) 438 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 439 } 440 441 static int __init disable_bulk_remove(char *str) 442 { 443 if (strcmp(str, "off") == 0 && 444 firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 445 printk(KERN_INFO "Disabling BULK_REMOVE firmware feature"); 446 powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; 447 } 448 return 1; 449 } 450 451 __setup("bulk_remove=", disable_bulk_remove); 452 453 void __init hpte_init_lpar(void) 454 { 455 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; 456 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; 457 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; 458 ppc_md.hpte_insert = pSeries_lpar_hpte_insert; 459 ppc_md.hpte_remove = pSeries_lpar_hpte_remove; 460 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; 461 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; 462 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; 463 } 464 465 #ifdef CONFIG_PPC_SMLPAR 466 #define CMO_FREE_HINT_DEFAULT 1 467 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; 468 469 static int __init cmo_free_hint(char *str) 470 { 471 char *parm; 472 parm = strstrip(str); 473 474 if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { 475 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n"); 476 cmo_free_hint_flag = 0; 477 return 1; 478 } 479 480 cmo_free_hint_flag = 1; 481 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n"); 482 483 if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) 484 return 1; 485 486 return 0; 487 } 488 489 __setup("cmo_free_hint=", cmo_free_hint); 490 491 static void pSeries_set_page_state(struct page *page, int order, 492 unsigned long state) 493 { 494 int i, j; 495 unsigned long cmo_page_sz, addr; 496 497 cmo_page_sz = cmo_get_page_size(); 498 addr = __pa((unsigned long)page_address(page)); 499 500 for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { 501 for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) 502 plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); 503 } 504 } 505 506 void arch_free_page(struct page *page, int order) 507 { 508 if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) 509 return; 510 511 pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); 512 } 513 EXPORT_SYMBOL(arch_free_page); 514 515 #endif 516 517 #ifdef CONFIG_TRACEPOINTS 518 /* 519 * We optimise our hcall path by placing hcall_tracepoint_refcount 520 * directly in the TOC so we can check if the hcall tracepoints are 521 * enabled via a single load. 522 */ 523 524 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 525 extern long hcall_tracepoint_refcount; 526 527 /* 528 * Since the tracing code might execute hcalls we need to guard against 529 * recursion. One example of this are spinlocks calling H_YIELD on 530 * shared processor partitions. 531 */ 532 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 533 534 void hcall_tracepoint_regfunc(void) 535 { 536 hcall_tracepoint_refcount++; 537 } 538 539 void hcall_tracepoint_unregfunc(void) 540 { 541 hcall_tracepoint_refcount--; 542 } 543 544 void __trace_hcall_entry(unsigned long opcode, unsigned long *args) 545 { 546 unsigned long flags; 547 unsigned int *depth; 548 549 local_irq_save(flags); 550 551 depth = &__get_cpu_var(hcall_trace_depth); 552 553 if (*depth) 554 goto out; 555 556 (*depth)++; 557 trace_hcall_entry(opcode, args); 558 (*depth)--; 559 560 out: 561 local_irq_restore(flags); 562 } 563 564 void __trace_hcall_exit(long opcode, unsigned long retval, 565 unsigned long *retbuf) 566 { 567 unsigned long flags; 568 unsigned int *depth; 569 570 local_irq_save(flags); 571 572 depth = &__get_cpu_var(hcall_trace_depth); 573 574 if (*depth) 575 goto out; 576 577 (*depth)++; 578 trace_hcall_exit(opcode, retval, retbuf); 579 (*depth)--; 580 581 out: 582 local_irq_restore(flags); 583 } 584 #endif 585 586 /** 587 * h_get_mpp 588 * H_GET_MPP hcall returns info in 7 parms 589 */ 590 int h_get_mpp(struct hvcall_mpp_data *mpp_data) 591 { 592 int rc; 593 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 594 595 rc = plpar_hcall9(H_GET_MPP, retbuf); 596 597 mpp_data->entitled_mem = retbuf[0]; 598 mpp_data->mapped_mem = retbuf[1]; 599 600 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; 601 mpp_data->pool_num = retbuf[2] & 0xffff; 602 603 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; 604 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; 605 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; 606 607 mpp_data->pool_size = retbuf[4]; 608 mpp_data->loan_request = retbuf[5]; 609 mpp_data->backing_mem = retbuf[6]; 610 611 return rc; 612 } 613 EXPORT_SYMBOL(h_get_mpp); 614 615 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) 616 { 617 int rc; 618 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; 619 620 rc = plpar_hcall9(H_GET_MPP_X, retbuf); 621 622 mpp_x_data->coalesced_bytes = retbuf[0]; 623 mpp_x_data->pool_coalesced_bytes = retbuf[1]; 624 mpp_x_data->pool_purr_cycles = retbuf[2]; 625 mpp_x_data->pool_spurr_cycles = retbuf[3]; 626 627 return rc; 628 } 629