1 /* 2 * pSeries_lpar.c 3 * Copyright (C) 2001 Todd Inglett, IBM Corporation 4 * 5 * pSeries LPAR support. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 /* Enables debugging of low-level hash table routines - careful! */ 23 #undef DEBUG 24 25 #include <linux/kernel.h> 26 #include <linux/dma-mapping.h> 27 #include <linux/console.h> 28 #include <linux/export.h> 29 #include <asm/processor.h> 30 #include <asm/mmu.h> 31 #include <asm/page.h> 32 #include <asm/pgtable.h> 33 #include <asm/machdep.h> 34 #include <asm/mmu_context.h> 35 #include <asm/iommu.h> 36 #include <asm/tlbflush.h> 37 #include <asm/tlb.h> 38 #include <asm/prom.h> 39 #include <asm/cputable.h> 40 #include <asm/udbg.h> 41 #include <asm/smp.h> 42 #include <asm/trace.h> 43 #include <asm/firmware.h> 44 45 #include "plpar_wrappers.h" 46 #include "pseries.h" 47 48 49 /* in hvCall.S */ 50 EXPORT_SYMBOL(plpar_hcall); 51 EXPORT_SYMBOL(plpar_hcall9); 52 EXPORT_SYMBOL(plpar_hcall_norets); 53 54 extern void pSeries_find_serial_port(void); 55 56 void vpa_init(int cpu) 57 { 58 int hwcpu = get_hard_smp_processor_id(cpu); 59 unsigned long addr; 60 long ret; 61 struct paca_struct *pp; 62 struct dtl_entry *dtl; 63 64 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 65 lppaca_of(cpu).vmxregs_in_use = 1; 66 67 addr = __pa(&lppaca_of(cpu)); 68 ret = register_vpa(hwcpu, addr); 69 70 if (ret) { 71 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " 72 "%lx failed with %ld\n", cpu, hwcpu, addr, ret); 73 return; 74 } 75 /* 76 * PAPR says this feature is SLB-Buffer but firmware never 77 * reports that. All SPLPAR support SLB shadow buffer. 78 */ 79 addr = __pa(&slb_shadow[cpu]); 80 if (firmware_has_feature(FW_FEATURE_SPLPAR)) { 81 ret = register_slb_shadow(hwcpu, addr); 82 if (ret) 83 pr_err("WARNING: SLB shadow buffer registration for " 84 "cpu %d (hw %d) of area %lx failed with %ld\n", 85 cpu, hwcpu, addr, ret); 86 } 87 88 /* 89 * Register dispatch trace log, if one has been allocated. 90 */ 91 pp = &paca[cpu]; 92 dtl = pp->dispatch_log; 93 if (dtl) { 94 pp->dtl_ridx = 0; 95 pp->dtl_curr = dtl; 96 lppaca_of(cpu).dtl_idx = 0; 97 98 /* hypervisor reads buffer length from this field */ 99 dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; 100 ret = register_dtl(hwcpu, __pa(dtl)); 101 if (ret) 102 pr_err("WARNING: DTL registration of cpu %d (hw %d) " 103 "failed with %ld\n", smp_processor_id(), 104 hwcpu, ret); 105 lppaca_of(cpu).dtl_enable_mask = 2; 106 } 107 } 108 109 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 110 unsigned long vpn, unsigned long pa, 111 unsigned long rflags, unsigned long vflags, 112 int psize, int ssize) 113 { 114 unsigned long lpar_rc; 115 unsigned long flags; 116 unsigned long slot; 117 unsigned long hpte_v, hpte_r; 118 119 if (!(vflags & HPTE_V_BOLTED)) 120 pr_devel("hpte_insert(group=%lx, vpn=%016lx, " 121 "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", 122 hpte_group, vpn, pa, rflags, vflags, psize); 123 124 hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; 125 hpte_r = hpte_encode_r(pa, psize) | rflags; 126 127 if (!(vflags & HPTE_V_BOLTED)) 128 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); 129 130 /* Now fill in the actual HPTE */ 131 /* Set CEC cookie to 0 */ 132 /* Zero page = 0 */ 133 /* I-cache Invalidate = 0 */ 134 /* I-cache synchronize = 0 */ 135 /* Exact = 0 */ 136 flags = 0; 137 138 /* Make pHyp happy */ 139 if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) 140 hpte_r &= ~_PAGE_COHERENT; 141 if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) 142 flags |= H_COALESCE_CAND; 143 144 lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); 145 if (unlikely(lpar_rc == H_PTEG_FULL)) { 146 if (!(vflags & HPTE_V_BOLTED)) 147 pr_devel(" full\n"); 148 return -1; 149 } 150 151 /* 152 * Since we try and ioremap PHBs we don't own, the pte insert 153 * will fail. However we must catch the failure in hash_page 154 * or we will loop forever, so return -2 in this case. 155 */ 156 if (unlikely(lpar_rc != H_SUCCESS)) { 157 if (!(vflags & HPTE_V_BOLTED)) 158 pr_devel(" lpar err %lu\n", lpar_rc); 159 return -2; 160 } 161 if (!(vflags & HPTE_V_BOLTED)) 162 pr_devel(" -> slot: %lu\n", slot & 7); 163 164 /* Because of iSeries, we have to pass down the secondary 165 * bucket bit here as well 166 */ 167 return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); 168 } 169 170 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); 171 172 static long pSeries_lpar_hpte_remove(unsigned long hpte_group) 173 { 174 unsigned long slot_offset; 175 unsigned long lpar_rc; 176 int i; 177 unsigned long dummy1, dummy2; 178 179 /* pick a random slot to start at */ 180 slot_offset = mftb() & 0x7; 181 182 for (i = 0; i < HPTES_PER_GROUP; i++) { 183 184 /* don't remove a bolted entry */ 185 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, 186 (0x1UL << 4), &dummy1, &dummy2); 187 if (lpar_rc == H_SUCCESS) 188 return i; 189 BUG_ON(lpar_rc != H_NOT_FOUND); 190 191 slot_offset++; 192 slot_offset &= 0x7; 193 } 194 195 return -1; 196 } 197 198 static void pSeries_lpar_hptab_clear(void) 199 { 200 unsigned long size_bytes = 1UL << ppc64_pft_size; 201 unsigned long hpte_count = size_bytes >> 4; 202 struct { 203 unsigned long pteh; 204 unsigned long ptel; 205 } ptes[4]; 206 long lpar_rc; 207 unsigned long i, j; 208 209 /* Read in batches of 4, 210 * invalidate only valid entries not in the VRMA 211 * hpte_count will be a multiple of 4 212 */ 213 for (i = 0; i < hpte_count; i += 4) { 214 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); 215 if (lpar_rc != H_SUCCESS) 216 continue; 217 for (j = 0; j < 4; j++){ 218 if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == 219 HPTE_V_VRMA_MASK) 220 continue; 221 if (ptes[j].pteh & HPTE_V_VALID) 222 plpar_pte_remove_raw(0, i + j, 0, 223 &(ptes[j].pteh), &(ptes[j].ptel)); 224 } 225 } 226 } 227 228 /* 229 * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and 230 * the low 3 bits of flags happen to line up. So no transform is needed. 231 * We can probably optimize here and assume the high bits of newpp are 232 * already zero. For now I am paranoid. 233 */ 234 static long pSeries_lpar_hpte_updatepp(unsigned long slot, 235 unsigned long newpp, 236 unsigned long vpn, 237 int psize, int ssize, int local) 238 { 239 unsigned long lpar_rc; 240 unsigned long flags = (newpp & 7) | H_AVPN; 241 unsigned long want_v; 242 243 want_v = hpte_encode_avpn(vpn, psize, ssize); 244 245 pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", 246 want_v, slot, flags, psize); 247 248 lpar_rc = plpar_pte_protect(flags, slot, want_v); 249 250 if (lpar_rc == H_NOT_FOUND) { 251 pr_devel("not found !\n"); 252 return -1; 253 } 254 255 pr_devel("ok\n"); 256 257 BUG_ON(lpar_rc != H_SUCCESS); 258 259 return 0; 260 } 261 262 static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) 263 { 264 unsigned long dword0; 265 unsigned long lpar_rc; 266 unsigned long dummy_word1; 267 unsigned long flags; 268 269 /* Read 1 pte at a time */ 270 /* Do not need RPN to logical page translation */ 271 /* No cross CEC PFT access */ 272 flags = 0; 273 274 lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1); 275 276 BUG_ON(lpar_rc != H_SUCCESS); 277 278 return dword0; 279 } 280 281 static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) 282 { 283 unsigned long hash; 284 unsigned long i; 285 long slot; 286 unsigned long want_v, hpte_v; 287 288 hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); 289 want_v = hpte_encode_avpn(vpn, psize, ssize); 290 291 /* Bolted entries are always in the primary group */ 292 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 293 for (i = 0; i < HPTES_PER_GROUP; i++) { 294 hpte_v = pSeries_lpar_hpte_getword0(slot); 295 296 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) 297 /* HPTE matches */ 298 return slot; 299 ++slot; 300 } 301 302 return -1; 303 } 304 305 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, 306 unsigned long ea, 307 int psize, int ssize) 308 { 309 unsigned long vpn; 310 unsigned long lpar_rc, slot, vsid, flags; 311 312 vsid = get_kernel_vsid(ea, ssize); 313 vpn = hpt_vpn(ea, vsid, ssize); 314 315 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 316 BUG_ON(slot == -1); 317 318 flags = newpp & 7; 319 lpar_rc = plpar_pte_protect(flags, slot, 0); 320 321 BUG_ON(lpar_rc != H_SUCCESS); 322 } 323 324 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, 325 int psize, int ssize, int local) 326 { 327 unsigned long want_v; 328 unsigned long lpar_rc; 329 unsigned long dummy1, dummy2; 330 331 pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", 332 slot, vpn, psize, local); 333 334 want_v = hpte_encode_avpn(vpn, psize, ssize); 335 lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); 336 if (lpar_rc == H_NOT_FOUND) 337 return; 338 339 BUG_ON(lpar_rc != H_SUCCESS); 340 } 341 342 static void pSeries_lpar_hpte_removebolted(unsigned long ea, 343 int psize, int ssize) 344 { 345 unsigned long vpn; 346 unsigned long slot, vsid; 347 348 vsid = get_kernel_vsid(ea, ssize); 349 vpn = hpt_vpn(ea, vsid, ssize); 350 351 slot = pSeries_lpar_hpte_find(vpn, psize, ssize); 352 BUG_ON(slot == -1); 353 354 pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); 355 } 356 357 /* Flag bits for H_BULK_REMOVE */ 358 #define HBR_REQUEST 0x4000000000000000UL 359 #define HBR_RESPONSE 0x8000000000000000UL 360 #define HBR_END 0xc000000000000000UL 361 #define HBR_AVPN 0x0200000000000000UL 362 #define HBR_ANDCOND 0x0100000000000000UL 363 364 /* 365 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie 366 * lock. 367 */ 368 static void pSeries_lpar_flush_hash_range(unsigned long number, int local) 369 { 370 unsigned long vpn; 371 unsigned long i, pix, rc; 372 unsigned long flags = 0; 373 struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); 374 int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); 375 unsigned long param[9]; 376 unsigned long hash, index, shift, hidx, slot; 377 real_pte_t pte; 378 int psize, ssize; 379 380 if (lock_tlbie) 381 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); 382 383 psize = batch->psize; 384 ssize = batch->ssize; 385 pix = 0; 386 for (i = 0; i < number; i++) { 387 vpn = batch->vpn[i]; 388 pte = batch->pte[i]; 389 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { 390 hash = hpt_hash(vpn, shift, ssize); 391 hidx = __rpte_to_hidx(pte, index); 392 if (hidx & _PTEIDX_SECONDARY) 393 hash = ~hash; 394 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 395 slot += hidx & _PTEIDX_GROUP_IX; 396 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 397 pSeries_lpar_hpte_invalidate(slot, vpn, psize, 398 ssize, local); 399 } else { 400 param[pix] = HBR_REQUEST | HBR_AVPN | slot; 401 param[pix+1] = hpte_encode_avpn(vpn, psize, 402 ssize); 403 pix += 2; 404 if (pix == 8) { 405 rc = plpar_hcall9(H_BULK_REMOVE, param, 406 param[0], param[1], param[2], 407 param[3], param[4], param[5], 408 param[6], param[7]); 409 BUG_ON(rc != H_SUCCESS); 410 pix = 0; 411 } 412 } 413 } pte_iterate_hashed_end(); 414 } 415 if (pix) { 416 param[pix] = HBR_END; 417 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], 418 param[2], param[3], param[4], param[5], 419 param[6], param[7]); 420 BUG_ON(rc != H_SUCCESS); 421 } 422 423 if (lock_tlbie) 424 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 425 } 426 427 static int __init disable_bulk_remove(char *str) 428 { 429 if (strcmp(str, "off") == 0 && 430 firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { 431 printk(KERN_INFO "Disabling BULK_REMOVE firmware feature"); 432 powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; 433 } 434 return 1; 435 } 436 437 __setup("bulk_remove=", disable_bulk_remove); 438 439 void __init hpte_init_lpar(void) 440 { 441 ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; 442 ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; 443 ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; 444 ppc_md.hpte_insert = pSeries_lpar_hpte_insert; 445 ppc_md.hpte_remove = pSeries_lpar_hpte_remove; 446 ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; 447 ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; 448 ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; 449 } 450 451 #ifdef CONFIG_PPC_SMLPAR 452 #define CMO_FREE_HINT_DEFAULT 1 453 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; 454 455 static int __init cmo_free_hint(char *str) 456 { 457 char *parm; 458 parm = strstrip(str); 459 460 if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { 461 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is not active.\n"); 462 cmo_free_hint_flag = 0; 463 return 1; 464 } 465 466 cmo_free_hint_flag = 1; 467 printk(KERN_INFO "cmo_free_hint: CMO free page hinting is active.\n"); 468 469 if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) 470 return 1; 471 472 return 0; 473 } 474 475 __setup("cmo_free_hint=", cmo_free_hint); 476 477 static void pSeries_set_page_state(struct page *page, int order, 478 unsigned long state) 479 { 480 int i, j; 481 unsigned long cmo_page_sz, addr; 482 483 cmo_page_sz = cmo_get_page_size(); 484 addr = __pa((unsigned long)page_address(page)); 485 486 for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { 487 for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) 488 plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); 489 } 490 } 491 492 void arch_free_page(struct page *page, int order) 493 { 494 if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) 495 return; 496 497 pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); 498 } 499 EXPORT_SYMBOL(arch_free_page); 500 501 #endif 502 503 #ifdef CONFIG_TRACEPOINTS 504 /* 505 * We optimise our hcall path by placing hcall_tracepoint_refcount 506 * directly in the TOC so we can check if the hcall tracepoints are 507 * enabled via a single load. 508 */ 509 510 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ 511 extern long hcall_tracepoint_refcount; 512 513 /* 514 * Since the tracing code might execute hcalls we need to guard against 515 * recursion. One example of this are spinlocks calling H_YIELD on 516 * shared processor partitions. 517 */ 518 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); 519 520 void hcall_tracepoint_regfunc(void) 521 { 522 hcall_tracepoint_refcount++; 523 } 524 525 void hcall_tracepoint_unregfunc(void) 526 { 527 hcall_tracepoint_refcount--; 528 } 529 530 void __trace_hcall_entry(unsigned long opcode, unsigned long *args) 531 { 532 unsigned long flags; 533 unsigned int *depth; 534 535 /* 536 * We cannot call tracepoints inside RCU idle regions which 537 * means we must not trace H_CEDE. 538 */ 539 if (opcode == H_CEDE) 540 return; 541 542 local_irq_save(flags); 543 544 depth = &__get_cpu_var(hcall_trace_depth); 545 546 if (*depth) 547 goto out; 548 549 (*depth)++; 550 preempt_disable(); 551 trace_hcall_entry(opcode, args); 552 (*depth)--; 553 554 out: 555 local_irq_restore(flags); 556 } 557 558 void __trace_hcall_exit(long opcode, unsigned long retval, 559 unsigned long *retbuf) 560 { 561 unsigned long flags; 562 unsigned int *depth; 563 564 if (opcode == H_CEDE) 565 return; 566 567 local_irq_save(flags); 568 569 depth = &__get_cpu_var(hcall_trace_depth); 570 571 if (*depth) 572 goto out; 573 574 (*depth)++; 575 trace_hcall_exit(opcode, retval, retbuf); 576 preempt_enable(); 577 (*depth)--; 578 579 out: 580 local_irq_restore(flags); 581 } 582 #endif 583 584 /** 585 * h_get_mpp 586 * H_GET_MPP hcall returns info in 7 parms 587 */ 588 int h_get_mpp(struct hvcall_mpp_data *mpp_data) 589 { 590 int rc; 591 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; 592 593 rc = plpar_hcall9(H_GET_MPP, retbuf); 594 595 mpp_data->entitled_mem = retbuf[0]; 596 mpp_data->mapped_mem = retbuf[1]; 597 598 mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; 599 mpp_data->pool_num = retbuf[2] & 0xffff; 600 601 mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; 602 mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; 603 mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; 604 605 mpp_data->pool_size = retbuf[4]; 606 mpp_data->loan_request = retbuf[5]; 607 mpp_data->backing_mem = retbuf[6]; 608 609 return rc; 610 } 611 EXPORT_SYMBOL(h_get_mpp); 612 613 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) 614 { 615 int rc; 616 unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; 617 618 rc = plpar_hcall9(H_GET_MPP_X, retbuf); 619 620 mpp_x_data->coalesced_bytes = retbuf[0]; 621 mpp_x_data->pool_coalesced_bytes = retbuf[1]; 622 mpp_x_data->pool_purr_cycles = retbuf[2]; 623 mpp_x_data->pool_spurr_cycles = retbuf[3]; 624 625 return rc; 626 } 627