1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * PowerNV cpuidle code 4 * 5 * Copyright 2015 IBM Corp. 6 */ 7 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/slab.h> 11 #include <linux/sysfs.h> 12 #include <linux/of.h> 13 #include <linux/device.h> 14 #include <linux/cpu.h> 15 16 #include <asm/firmware.h> 17 #include <asm/interrupt.h> 18 #include <asm/machdep.h> 19 #include <asm/opal.h> 20 #include <asm/cputhreads.h> 21 #include <asm/cpuidle.h> 22 #include <asm/text-patching.h> 23 #include <asm/smp.h> 24 #include <asm/runlatch.h> 25 #include <asm/dbell.h> 26 27 #include "powernv.h" 28 #include "subcore.h" 29 30 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 31 #define MAX_STOP_STATE 0xF 32 33 #define P9_STOP_SPR_MSR 2000 34 #define P9_STOP_SPR_PSSCR 855 35 36 static u32 supported_cpuidle_states; 37 struct pnv_idle_states_t *pnv_idle_states; 38 int nr_pnv_idle_states; 39 40 /* 41 * The default stop state that will be used by ppc_md.power_save 42 * function on platforms that support stop instruction. 43 */ 44 static u64 pnv_default_stop_val; 45 static u64 pnv_default_stop_mask; 46 static bool default_stop_found; 47 48 /* 49 * First stop state levels when SPR and TB loss can occur. 50 */ 51 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 52 static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; 53 54 /* 55 * psscr value and mask of the deepest stop idle state. 56 * Used when a cpu is offlined. 57 */ 58 static u64 pnv_deepest_stop_psscr_val; 59 static u64 pnv_deepest_stop_psscr_mask; 60 static u64 pnv_deepest_stop_flag; 61 static bool deepest_stop_found; 62 63 static unsigned long power7_offline_type; 64 65 static int __init pnv_save_sprs_for_deep_states(void) 66 { 67 int cpu; 68 int rc; 69 70 /* 71 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 72 * all cpus at boot. Get these reg values of current cpu and use the 73 * same across all cpus. 74 */ 75 uint64_t lpcr_val = mfspr(SPRN_LPCR); 76 uint64_t hid0_val = mfspr(SPRN_HID0); 77 uint64_t hmeer_val = mfspr(SPRN_HMEER); 78 uint64_t msr_val = MSR_IDLE; 79 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 80 81 for_each_present_cpu(cpu) { 82 uint64_t pir = get_hard_smp_processor_id(cpu); 83 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 84 85 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 86 if (rc != 0) 87 return rc; 88 89 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 90 if (rc != 0) 91 return rc; 92 93 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 94 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 95 if (rc) 96 return rc; 97 98 rc = opal_slw_set_reg(pir, 99 P9_STOP_SPR_PSSCR, psscr_val); 100 101 if (rc) 102 return rc; 103 } 104 105 /* HIDs are per core registers */ 106 if (cpu_thread_in_core(cpu) == 0) { 107 108 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 109 if (rc != 0) 110 return rc; 111 112 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 113 if (rc != 0) 114 return rc; 115 116 /* Only p8 needs to set extra HID registers */ 117 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 118 uint64_t hid1_val = mfspr(SPRN_HID1); 119 uint64_t hid4_val = mfspr(SPRN_HID4); 120 uint64_t hid5_val = mfspr(SPRN_HID5); 121 122 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 123 if (rc != 0) 124 return rc; 125 126 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 127 if (rc != 0) 128 return rc; 129 130 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 131 if (rc != 0) 132 return rc; 133 } 134 } 135 } 136 137 return 0; 138 } 139 140 u32 pnv_get_supported_cpuidle_states(void) 141 { 142 return supported_cpuidle_states; 143 } 144 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 145 146 static void pnv_fastsleep_workaround_apply(void *info) 147 148 { 149 int cpu = smp_processor_id(); 150 int rc; 151 int *err = info; 152 153 if (cpu_first_thread_sibling(cpu) != cpu) 154 return; 155 156 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 157 OPAL_CONFIG_IDLE_APPLY); 158 if (rc) 159 *err = 1; 160 } 161 162 static bool power7_fastsleep_workaround_entry = true; 163 static bool power7_fastsleep_workaround_exit = true; 164 165 /* 166 * Used to store fastsleep workaround state 167 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 168 * 1 - Workaround applied once, never undone. 169 */ 170 static u8 fastsleep_workaround_applyonce; 171 172 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 173 struct device_attribute *attr, char *buf) 174 { 175 return sysfs_emit(buf, "%u\n", fastsleep_workaround_applyonce); 176 } 177 178 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 179 struct device_attribute *attr, const char *buf, 180 size_t count) 181 { 182 int err; 183 u8 val; 184 185 if (kstrtou8(buf, 0, &val) || val != 1) 186 return -EINVAL; 187 188 if (fastsleep_workaround_applyonce == 1) 189 return count; 190 191 /* 192 * fastsleep_workaround_applyonce = 1 implies 193 * fastsleep workaround needs to be left in 'applied' state on all 194 * the cores. Do this by- 195 * 1. Disable the 'undo' workaround in fastsleep exit path 196 * 2. Sendi IPIs to all the cores which have at least one online thread 197 * 3. Disable the 'apply' workaround in fastsleep entry path 198 * 199 * There is no need to send ipi to cores which have all threads 200 * offlined, as last thread of the core entering fastsleep or deeper 201 * state would have applied workaround. 202 */ 203 power7_fastsleep_workaround_exit = false; 204 205 cpus_read_lock(); 206 on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); 207 cpus_read_unlock(); 208 if (err) { 209 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 210 goto fail; 211 } 212 213 power7_fastsleep_workaround_entry = false; 214 215 fastsleep_workaround_applyonce = 1; 216 217 return count; 218 fail: 219 return -EIO; 220 } 221 222 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 223 show_fastsleep_workaround_applyonce, 224 store_fastsleep_workaround_applyonce); 225 226 static inline void atomic_start_thread_idle(void) 227 { 228 int cpu = raw_smp_processor_id(); 229 int first = cpu_first_thread_sibling(cpu); 230 int thread_nr = cpu_thread_in_core(cpu); 231 unsigned long *state = &paca_ptrs[first]->idle_state; 232 233 clear_bit(thread_nr, state); 234 } 235 236 static inline void atomic_stop_thread_idle(void) 237 { 238 int cpu = raw_smp_processor_id(); 239 int first = cpu_first_thread_sibling(cpu); 240 int thread_nr = cpu_thread_in_core(cpu); 241 unsigned long *state = &paca_ptrs[first]->idle_state; 242 243 set_bit(thread_nr, state); 244 } 245 246 static inline void atomic_lock_thread_idle(void) 247 { 248 int cpu = raw_smp_processor_id(); 249 int first = cpu_first_thread_sibling(cpu); 250 unsigned long *lock = &paca_ptrs[first]->idle_lock; 251 252 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) 253 barrier(); 254 } 255 256 static inline void atomic_unlock_and_stop_thread_idle(void) 257 { 258 int cpu = raw_smp_processor_id(); 259 int first = cpu_first_thread_sibling(cpu); 260 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 261 unsigned long *state = &paca_ptrs[first]->idle_state; 262 unsigned long *lock = &paca_ptrs[first]->idle_lock; 263 u64 s = READ_ONCE(*state); 264 u64 new, tmp; 265 266 BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); 267 BUG_ON(s & thread); 268 269 again: 270 new = s | thread; 271 tmp = cmpxchg(state, s, new); 272 if (unlikely(tmp != s)) { 273 s = tmp; 274 goto again; 275 } 276 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); 277 } 278 279 static inline void atomic_unlock_thread_idle(void) 280 { 281 int cpu = raw_smp_processor_id(); 282 int first = cpu_first_thread_sibling(cpu); 283 unsigned long *lock = &paca_ptrs[first]->idle_lock; 284 285 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); 286 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); 287 } 288 289 /* P7 and P8 */ 290 struct p7_sprs { 291 /* per core */ 292 u64 tscr; 293 u64 worc; 294 295 /* per subcore */ 296 u64 sdr1; 297 u64 rpr; 298 299 /* per thread */ 300 u64 lpcr; 301 u64 hfscr; 302 u64 fscr; 303 u64 purr; 304 u64 spurr; 305 u64 dscr; 306 u64 wort; 307 308 /* per thread SPRs that get lost in shallow states */ 309 u64 amr; 310 u64 iamr; 311 u64 uamor; 312 /* amor is restored to constant ~0 */ 313 }; 314 315 static unsigned long power7_idle_insn(unsigned long type) 316 { 317 int cpu = raw_smp_processor_id(); 318 int first = cpu_first_thread_sibling(cpu); 319 unsigned long *state = &paca_ptrs[first]->idle_state; 320 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 321 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 322 unsigned long srr1; 323 bool full_winkle; 324 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 325 bool sprs_saved = false; 326 int rc; 327 328 if (unlikely(type != PNV_THREAD_NAP)) { 329 atomic_lock_thread_idle(); 330 331 BUG_ON(!(*state & thread)); 332 *state &= ~thread; 333 334 if (power7_fastsleep_workaround_entry) { 335 if ((*state & core_thread_mask) == 0) { 336 rc = opal_config_cpu_idle_state( 337 OPAL_CONFIG_IDLE_FASTSLEEP, 338 OPAL_CONFIG_IDLE_APPLY); 339 BUG_ON(rc); 340 } 341 } 342 343 if (type == PNV_THREAD_WINKLE) { 344 sprs.tscr = mfspr(SPRN_TSCR); 345 sprs.worc = mfspr(SPRN_WORC); 346 347 sprs.sdr1 = mfspr(SPRN_SDR1); 348 sprs.rpr = mfspr(SPRN_RPR); 349 350 sprs.lpcr = mfspr(SPRN_LPCR); 351 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 352 sprs.hfscr = mfspr(SPRN_HFSCR); 353 sprs.fscr = mfspr(SPRN_FSCR); 354 } 355 sprs.purr = mfspr(SPRN_PURR); 356 sprs.spurr = mfspr(SPRN_SPURR); 357 sprs.dscr = mfspr(SPRN_DSCR); 358 sprs.wort = mfspr(SPRN_WORT); 359 360 sprs_saved = true; 361 362 /* 363 * Increment winkle counter and set all winkle bits if 364 * all threads are winkling. This allows wakeup side to 365 * distinguish between fast sleep and winkle state 366 * loss. Fast sleep still has to resync the timebase so 367 * this may not be a really big win. 368 */ 369 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 370 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 371 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 372 == threads_per_core) 373 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 374 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 375 } 376 377 atomic_unlock_thread_idle(); 378 } 379 380 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 381 sprs.amr = mfspr(SPRN_AMR); 382 sprs.iamr = mfspr(SPRN_IAMR); 383 sprs.uamor = mfspr(SPRN_UAMOR); 384 } 385 386 local_paca->thread_idle_state = type; 387 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 388 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 389 390 WARN_ON_ONCE(!srr1); 391 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 392 393 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 394 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 395 /* 396 * We don't need an isync after the mtsprs here because 397 * the upcoming mtmsrd is execution synchronizing. 398 */ 399 mtspr(SPRN_AMR, sprs.amr); 400 mtspr(SPRN_IAMR, sprs.iamr); 401 mtspr(SPRN_AMOR, ~0); 402 mtspr(SPRN_UAMOR, sprs.uamor); 403 } 404 } 405 406 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 407 hmi_exception_realmode(NULL); 408 409 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 410 if (unlikely(type != PNV_THREAD_NAP)) { 411 atomic_lock_thread_idle(); 412 if (type == PNV_THREAD_WINKLE) { 413 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 414 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 415 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 416 } 417 atomic_unlock_and_stop_thread_idle(); 418 } 419 return srr1; 420 } 421 422 /* HV state loss */ 423 BUG_ON(type == PNV_THREAD_NAP); 424 425 atomic_lock_thread_idle(); 426 427 full_winkle = false; 428 if (type == PNV_THREAD_WINKLE) { 429 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 430 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 431 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 432 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 433 full_winkle = true; 434 BUG_ON(!sprs_saved); 435 } 436 } 437 438 WARN_ON(*state & thread); 439 440 if ((*state & core_thread_mask) != 0) 441 goto core_woken; 442 443 /* Per-core SPRs */ 444 if (full_winkle) { 445 mtspr(SPRN_TSCR, sprs.tscr); 446 mtspr(SPRN_WORC, sprs.worc); 447 } 448 449 if (power7_fastsleep_workaround_exit) { 450 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 451 OPAL_CONFIG_IDLE_UNDO); 452 BUG_ON(rc); 453 } 454 455 /* TB */ 456 if (opal_resync_timebase() != OPAL_SUCCESS) 457 BUG(); 458 459 core_woken: 460 if (!full_winkle) 461 goto subcore_woken; 462 463 if ((*state & local_paca->subcore_sibling_mask) != 0) 464 goto subcore_woken; 465 466 /* Per-subcore SPRs */ 467 mtspr(SPRN_SDR1, sprs.sdr1); 468 mtspr(SPRN_RPR, sprs.rpr); 469 470 subcore_woken: 471 /* 472 * isync after restoring shared SPRs and before unlocking. Unlock 473 * only contains hwsync which does not necessarily do the right 474 * thing for SPRs. 475 */ 476 isync(); 477 atomic_unlock_and_stop_thread_idle(); 478 479 /* Fast sleep does not lose SPRs */ 480 if (!full_winkle) 481 return srr1; 482 483 /* Per-thread SPRs */ 484 mtspr(SPRN_LPCR, sprs.lpcr); 485 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 486 mtspr(SPRN_HFSCR, sprs.hfscr); 487 mtspr(SPRN_FSCR, sprs.fscr); 488 } 489 mtspr(SPRN_PURR, sprs.purr); 490 mtspr(SPRN_SPURR, sprs.spurr); 491 mtspr(SPRN_DSCR, sprs.dscr); 492 mtspr(SPRN_WORT, sprs.wort); 493 494 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 495 496 #ifdef CONFIG_PPC_64S_HASH_MMU 497 /* 498 * The SLB has to be restored here, but it sometimes still 499 * contains entries, so the __ variant must be used to prevent 500 * multi hits. 501 */ 502 __slb_restore_bolted_realmode(); 503 #endif 504 505 return srr1; 506 } 507 508 extern unsigned long idle_kvm_start_guest(unsigned long srr1); 509 510 #ifdef CONFIG_HOTPLUG_CPU 511 static unsigned long power7_offline(void) 512 { 513 unsigned long srr1; 514 515 mtmsr(MSR_IDLE); 516 517 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 518 /* Tell KVM we're entering idle. */ 519 /******************************************************/ 520 /* N O T E W E L L ! ! ! N O T E W E L L */ 521 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 522 /* MUST occur in real mode, i.e. with the MMU off, */ 523 /* and the MMU must stay off until we clear this flag */ 524 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 525 /* pnv_powersave_wakeup in this file. */ 526 /* The reason is that another thread can switch the */ 527 /* MMU to a guest context whenever this flag is set */ 528 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 529 /* that would potentially cause this thread to start */ 530 /* executing instructions from guest memory in */ 531 /* hypervisor mode, leading to a host crash or data */ 532 /* corruption, or worse. */ 533 /******************************************************/ 534 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 535 #endif 536 537 __ppc64_runlatch_off(); 538 srr1 = power7_idle_insn(power7_offline_type); 539 __ppc64_runlatch_on(); 540 541 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 542 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 543 /* Order setting hwthread_state vs. testing hwthread_req */ 544 smp_mb(); 545 if (local_paca->kvm_hstate.hwthread_req) 546 srr1 = idle_kvm_start_guest(srr1); 547 #endif 548 549 mtmsr(MSR_KERNEL); 550 551 return srr1; 552 } 553 #endif 554 555 void power7_idle_type(unsigned long type) 556 { 557 unsigned long srr1; 558 559 if (!prep_irq_for_idle_irqsoff()) 560 return; 561 562 mtmsr(MSR_IDLE); 563 __ppc64_runlatch_off(); 564 srr1 = power7_idle_insn(type); 565 __ppc64_runlatch_on(); 566 mtmsr(MSR_KERNEL); 567 568 fini_irq_for_idle_irqsoff(); 569 irq_set_pending_from_srr1(srr1); 570 } 571 572 static void power7_idle(void) 573 { 574 if (!powersave_nap) 575 return; 576 577 power7_idle_type(PNV_THREAD_NAP); 578 } 579 580 struct p9_sprs { 581 /* per core */ 582 u64 ptcr; 583 u64 rpr; 584 u64 tscr; 585 u64 ldbar; 586 587 /* per thread */ 588 u64 lpcr; 589 u64 hfscr; 590 u64 fscr; 591 u64 pid; 592 u64 purr; 593 u64 spurr; 594 u64 dscr; 595 u64 ciabr; 596 597 u64 mmcra; 598 u32 mmcr0; 599 u32 mmcr1; 600 u64 mmcr2; 601 602 /* per thread SPRs that get lost in shallow states */ 603 u64 amr; 604 u64 iamr; 605 u64 amor; 606 u64 uamor; 607 }; 608 609 static unsigned long power9_idle_stop(unsigned long psscr) 610 { 611 int cpu = raw_smp_processor_id(); 612 int first = cpu_first_thread_sibling(cpu); 613 unsigned long *state = &paca_ptrs[first]->idle_state; 614 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 615 unsigned long srr1; 616 unsigned long pls; 617 unsigned long mmcr0 = 0; 618 unsigned long mmcra = 0; 619 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 620 bool sprs_saved = false; 621 622 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 623 /* EC=ESL=0 case */ 624 625 /* 626 * Wake synchronously. SRESET via xscom may still cause 627 * a 0x100 powersave wakeup with SRR1 reason! 628 */ 629 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 630 if (likely(!srr1)) 631 return 0; 632 633 /* 634 * Registers not saved, can't recover! 635 * This would be a hardware bug 636 */ 637 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 638 639 goto out; 640 } 641 642 /* EC=ESL=1 case */ 643 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 644 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 645 local_paca->requested_psscr = psscr; 646 /* order setting requested_psscr vs testing dont_stop */ 647 smp_mb(); 648 if (atomic_read(&local_paca->dont_stop)) { 649 local_paca->requested_psscr = 0; 650 return 0; 651 } 652 } 653 #endif 654 655 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 656 /* 657 * POWER9 DD2 can incorrectly set PMAO when waking up 658 * after a state-loss idle. Saving and restoring MMCR0 659 * over idle is a workaround. 660 */ 661 mmcr0 = mfspr(SPRN_MMCR0); 662 } 663 664 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 665 sprs.lpcr = mfspr(SPRN_LPCR); 666 sprs.hfscr = mfspr(SPRN_HFSCR); 667 sprs.fscr = mfspr(SPRN_FSCR); 668 sprs.pid = mfspr(SPRN_PID); 669 sprs.purr = mfspr(SPRN_PURR); 670 sprs.spurr = mfspr(SPRN_SPURR); 671 sprs.dscr = mfspr(SPRN_DSCR); 672 sprs.ciabr = mfspr(SPRN_CIABR); 673 674 sprs.mmcra = mfspr(SPRN_MMCRA); 675 sprs.mmcr0 = mfspr(SPRN_MMCR0); 676 sprs.mmcr1 = mfspr(SPRN_MMCR1); 677 sprs.mmcr2 = mfspr(SPRN_MMCR2); 678 679 sprs.ptcr = mfspr(SPRN_PTCR); 680 sprs.rpr = mfspr(SPRN_RPR); 681 sprs.tscr = mfspr(SPRN_TSCR); 682 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 683 sprs.ldbar = mfspr(SPRN_LDBAR); 684 685 sprs_saved = true; 686 687 atomic_start_thread_idle(); 688 } 689 690 sprs.amr = mfspr(SPRN_AMR); 691 sprs.iamr = mfspr(SPRN_IAMR); 692 sprs.uamor = mfspr(SPRN_UAMOR); 693 694 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 695 696 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 697 local_paca->requested_psscr = 0; 698 #endif 699 700 psscr = mfspr(SPRN_PSSCR); 701 702 WARN_ON_ONCE(!srr1); 703 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 704 705 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 706 /* 707 * We don't need an isync after the mtsprs here because the 708 * upcoming mtmsrd is execution synchronizing. 709 */ 710 mtspr(SPRN_AMR, sprs.amr); 711 mtspr(SPRN_IAMR, sprs.iamr); 712 mtspr(SPRN_AMOR, ~0); 713 mtspr(SPRN_UAMOR, sprs.uamor); 714 715 /* 716 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 717 * might have been corrupted and needs flushing. We also need 718 * to reload MMCR0 (see mmcr0 comment above). 719 */ 720 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 721 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 722 mtspr(SPRN_MMCR0, mmcr0); 723 } 724 725 /* 726 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 727 * to ensure the PMU starts running. 728 */ 729 mmcra = mfspr(SPRN_MMCRA); 730 mmcra |= PPC_BIT(60); 731 mtspr(SPRN_MMCRA, mmcra); 732 mmcra &= ~PPC_BIT(60); 733 mtspr(SPRN_MMCRA, mmcra); 734 } 735 736 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 737 hmi_exception_realmode(NULL); 738 739 /* 740 * On POWER9, SRR1 bits do not match exactly as expected. 741 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 742 * just always test PSSCR for SPR/TB state loss. 743 */ 744 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 745 if (likely(pls < deep_spr_loss_state)) { 746 if (sprs_saved) 747 atomic_stop_thread_idle(); 748 goto out; 749 } 750 751 /* HV state loss */ 752 BUG_ON(!sprs_saved); 753 754 atomic_lock_thread_idle(); 755 756 if ((*state & core_thread_mask) != 0) 757 goto core_woken; 758 759 /* Per-core SPRs */ 760 mtspr(SPRN_PTCR, sprs.ptcr); 761 mtspr(SPRN_RPR, sprs.rpr); 762 mtspr(SPRN_TSCR, sprs.tscr); 763 764 if (pls >= pnv_first_tb_loss_level) { 765 /* TB loss */ 766 if (opal_resync_timebase() != OPAL_SUCCESS) 767 BUG(); 768 } 769 770 /* 771 * isync after restoring shared SPRs and before unlocking. Unlock 772 * only contains hwsync which does not necessarily do the right 773 * thing for SPRs. 774 */ 775 isync(); 776 777 core_woken: 778 atomic_unlock_and_stop_thread_idle(); 779 780 /* Per-thread SPRs */ 781 mtspr(SPRN_LPCR, sprs.lpcr); 782 mtspr(SPRN_HFSCR, sprs.hfscr); 783 mtspr(SPRN_FSCR, sprs.fscr); 784 mtspr(SPRN_PID, sprs.pid); 785 mtspr(SPRN_PURR, sprs.purr); 786 mtspr(SPRN_SPURR, sprs.spurr); 787 mtspr(SPRN_DSCR, sprs.dscr); 788 mtspr(SPRN_CIABR, sprs.ciabr); 789 790 mtspr(SPRN_MMCRA, sprs.mmcra); 791 mtspr(SPRN_MMCR0, sprs.mmcr0); 792 mtspr(SPRN_MMCR1, sprs.mmcr1); 793 mtspr(SPRN_MMCR2, sprs.mmcr2); 794 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 795 mtspr(SPRN_LDBAR, sprs.ldbar); 796 797 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 798 799 if (!radix_enabled()) 800 __slb_restore_bolted_realmode(); 801 802 out: 803 mtmsr(MSR_KERNEL); 804 805 return srr1; 806 } 807 808 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 809 /* 810 * This is used in working around bugs in thread reconfiguration 811 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 812 * memory and the way that XER[SO] is checkpointed. 813 * This function forces the core into SMT4 in order by asking 814 * all other threads not to stop, and sending a message to any 815 * that are in a stop state. 816 * Must be called with preemption disabled. 817 */ 818 void pnv_power9_force_smt4_catch(void) 819 { 820 int cpu, cpu0, thr; 821 int awake_threads = 1; /* this thread is awake */ 822 int poke_threads = 0; 823 int need_awake = threads_per_core; 824 825 cpu = smp_processor_id(); 826 cpu0 = cpu & ~(threads_per_core - 1); 827 for (thr = 0; thr < threads_per_core; ++thr) { 828 if (cpu != cpu0 + thr) 829 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 830 } 831 /* order setting dont_stop vs testing requested_psscr */ 832 smp_mb(); 833 for (thr = 0; thr < threads_per_core; ++thr) { 834 if (!paca_ptrs[cpu0+thr]->requested_psscr) 835 ++awake_threads; 836 else 837 poke_threads |= (1 << thr); 838 } 839 840 /* If at least 3 threads are awake, the core is in SMT4 already */ 841 if (awake_threads < need_awake) { 842 /* We have to wake some threads; we'll use msgsnd */ 843 for (thr = 0; thr < threads_per_core; ++thr) { 844 if (poke_threads & (1 << thr)) { 845 ppc_msgsnd_sync(); 846 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 847 paca_ptrs[cpu0+thr]->hw_cpu_id); 848 } 849 } 850 /* now spin until at least 3 threads are awake */ 851 do { 852 for (thr = 0; thr < threads_per_core; ++thr) { 853 if ((poke_threads & (1 << thr)) && 854 !paca_ptrs[cpu0+thr]->requested_psscr) { 855 ++awake_threads; 856 poke_threads &= ~(1 << thr); 857 } 858 } 859 } while (awake_threads < need_awake); 860 } 861 } 862 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 863 864 void pnv_power9_force_smt4_release(void) 865 { 866 int cpu, cpu0, thr; 867 868 cpu = smp_processor_id(); 869 cpu0 = cpu & ~(threads_per_core - 1); 870 871 /* clear all the dont_stop flags */ 872 for (thr = 0; thr < threads_per_core; ++thr) { 873 if (cpu != cpu0 + thr) 874 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 875 } 876 } 877 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 878 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 879 880 struct p10_sprs { 881 /* 882 * SPRs that get lost in shallow states: 883 * 884 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 885 * isa300 idle routines restore CR, LR. 886 * CTR is volatile 887 * idle thread doesn't use FP or VEC 888 * kernel doesn't use TAR 889 * HSPRG1 is only live in HV interrupt entry 890 * SPRG2 is only live in KVM guests, KVM handles it. 891 */ 892 }; 893 894 static unsigned long power10_idle_stop(unsigned long psscr) 895 { 896 int cpu = raw_smp_processor_id(); 897 int first = cpu_first_thread_sibling(cpu); 898 unsigned long *state = &paca_ptrs[first]->idle_state; 899 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 900 unsigned long srr1; 901 unsigned long pls; 902 // struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 903 bool sprs_saved = false; 904 905 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 906 /* EC=ESL=0 case */ 907 908 /* 909 * Wake synchronously. SRESET via xscom may still cause 910 * a 0x100 powersave wakeup with SRR1 reason! 911 */ 912 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 913 if (likely(!srr1)) 914 return 0; 915 916 /* 917 * Registers not saved, can't recover! 918 * This would be a hardware bug 919 */ 920 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 921 922 goto out; 923 } 924 925 /* EC=ESL=1 case */ 926 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 927 /* XXX: save SPRs for deep state loss here. */ 928 929 sprs_saved = true; 930 931 atomic_start_thread_idle(); 932 } 933 934 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 935 936 psscr = mfspr(SPRN_PSSCR); 937 938 WARN_ON_ONCE(!srr1); 939 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 940 941 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 942 hmi_exception_realmode(NULL); 943 944 /* 945 * On POWER10, SRR1 bits do not match exactly as expected. 946 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 947 * just always test PSSCR for SPR/TB state loss. 948 */ 949 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 950 if (likely(pls < deep_spr_loss_state)) { 951 if (sprs_saved) 952 atomic_stop_thread_idle(); 953 goto out; 954 } 955 956 /* HV state loss */ 957 BUG_ON(!sprs_saved); 958 959 atomic_lock_thread_idle(); 960 961 if ((*state & core_thread_mask) != 0) 962 goto core_woken; 963 964 /* XXX: restore per-core SPRs here */ 965 966 if (pls >= pnv_first_tb_loss_level) { 967 /* TB loss */ 968 if (opal_resync_timebase() != OPAL_SUCCESS) 969 BUG(); 970 } 971 972 /* 973 * isync after restoring shared SPRs and before unlocking. Unlock 974 * only contains hwsync which does not necessarily do the right 975 * thing for SPRs. 976 */ 977 isync(); 978 979 core_woken: 980 atomic_unlock_and_stop_thread_idle(); 981 982 /* XXX: restore per-thread SPRs here */ 983 984 if (!radix_enabled()) 985 __slb_restore_bolted_realmode(); 986 987 out: 988 mtmsr(MSR_KERNEL); 989 990 return srr1; 991 } 992 993 #ifdef CONFIG_HOTPLUG_CPU 994 static unsigned long arch300_offline_stop(unsigned long psscr) 995 { 996 unsigned long srr1; 997 998 if (cpu_has_feature(CPU_FTR_ARCH_31)) 999 srr1 = power10_idle_stop(psscr); 1000 else 1001 srr1 = power9_idle_stop(psscr); 1002 1003 return srr1; 1004 } 1005 #endif 1006 1007 void arch300_idle_type(unsigned long stop_psscr_val, 1008 unsigned long stop_psscr_mask) 1009 { 1010 unsigned long psscr; 1011 unsigned long srr1; 1012 1013 if (!prep_irq_for_idle_irqsoff()) 1014 return; 1015 1016 psscr = mfspr(SPRN_PSSCR); 1017 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1018 1019 __ppc64_runlatch_off(); 1020 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1021 srr1 = power10_idle_stop(psscr); 1022 else 1023 srr1 = power9_idle_stop(psscr); 1024 __ppc64_runlatch_on(); 1025 1026 fini_irq_for_idle_irqsoff(); 1027 1028 irq_set_pending_from_srr1(srr1); 1029 } 1030 1031 /* 1032 * Used for ppc_md.power_save which needs a function with no parameters 1033 */ 1034 static void arch300_idle(void) 1035 { 1036 arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1037 } 1038 1039 #ifdef CONFIG_HOTPLUG_CPU 1040 1041 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 1042 { 1043 u64 pir = get_hard_smp_processor_id(cpu); 1044 1045 mtspr(SPRN_LPCR, lpcr_val); 1046 1047 /* 1048 * Program the LPCR via stop-api only if the deepest stop state 1049 * can lose hypervisor context. 1050 */ 1051 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 1052 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 1053 } 1054 1055 /* 1056 * pnv_cpu_offline: A function that puts the CPU into the deepest 1057 * available platform idle state on a CPU-Offline. 1058 * interrupts hard disabled and no lazy irq pending. 1059 */ 1060 unsigned long pnv_cpu_offline(unsigned int cpu) 1061 { 1062 unsigned long srr1; 1063 1064 __ppc64_runlatch_off(); 1065 1066 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 1067 unsigned long psscr; 1068 1069 psscr = mfspr(SPRN_PSSCR); 1070 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1071 pnv_deepest_stop_psscr_val; 1072 srr1 = arch300_offline_stop(psscr); 1073 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1074 srr1 = power7_offline(); 1075 } else { 1076 /* This is the fallback method. We emulate snooze */ 1077 while (!generic_check_cpu_restart(cpu)) { 1078 HMT_low(); 1079 HMT_very_low(); 1080 } 1081 srr1 = 0; 1082 HMT_medium(); 1083 } 1084 1085 __ppc64_runlatch_on(); 1086 1087 return srr1; 1088 } 1089 #endif 1090 1091 /* 1092 * Power ISA 3.0 idle initialization. 1093 * 1094 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1095 * Register (PSSCR) to control idle behavior. 1096 * 1097 * PSSCR layout: 1098 * ---------------------------------------------------------- 1099 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1100 * ---------------------------------------------------------- 1101 * 0 4 41 42 43 44 48 54 56 60 1102 * 1103 * PSSCR key fields: 1104 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1105 * lowest power-saving state the thread entered since stop instruction was 1106 * last executed. 1107 * 1108 * Bit 41 - Status Disable(SD) 1109 * 0 - Shows PLS entries 1110 * 1 - PLS entries are all 0 1111 * 1112 * Bit 42 - Enable State Loss 1113 * 0 - No state is lost irrespective of other fields 1114 * 1 - Allows state loss 1115 * 1116 * Bit 43 - Exit Criterion 1117 * 0 - Exit from power-save mode on any interrupt 1118 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1119 * 1120 * Bits 44:47 - Power-Saving Level Limit 1121 * This limits the power-saving level that can be entered into. 1122 * 1123 * Bits 60:63 - Requested Level 1124 * Used to specify which power-saving level must be entered on executing 1125 * stop instruction 1126 */ 1127 1128 int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1129 { 1130 int err = 0; 1131 1132 /* 1133 * psscr_mask == 0xf indicates an older firmware. 1134 * Set remaining fields of psscr to the default values. 1135 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1136 */ 1137 if (*psscr_mask == 0xf) { 1138 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1139 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1140 return err; 1141 } 1142 1143 /* 1144 * New firmware is expected to set the psscr_val bits correctly. 1145 * Validate that the following invariants are correctly maintained by 1146 * the new firmware. 1147 * - ESL bit value matches the EC bit value. 1148 * - ESL bit is set for all the deep stop states. 1149 */ 1150 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1151 err = ERR_EC_ESL_MISMATCH; 1152 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1153 GET_PSSCR_ESL(*psscr_val) == 0) { 1154 err = ERR_DEEP_STATE_ESL_MISMATCH; 1155 } 1156 1157 return err; 1158 } 1159 1160 /* 1161 * pnv_arch300_idle_init: Initializes the default idle state, first 1162 * deep idle state and deepest idle state on 1163 * ISA 3.0 CPUs. 1164 * 1165 * @np: /ibm,opal/power-mgt device node 1166 * @flags: cpu-idle-state-flags array 1167 * @dt_idle_states: Number of idle state entries 1168 * Returns 0 on success 1169 */ 1170 static void __init pnv_arch300_idle_init(void) 1171 { 1172 u64 max_residency_ns = 0; 1173 int i; 1174 1175 /* stop is not really architected, we only have p9,p10 and p11 drivers */ 1176 if (!pvr_version_is(PVR_POWER9) && !pvr_version_is(PVR_POWER10) && 1177 !pvr_version_is(PVR_POWER11)) 1178 return; 1179 1180 /* 1181 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1182 * the deepest stop state. 1183 * 1184 * pnv_default_stop_{val,mask} should be set to values corresponding to 1185 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1186 */ 1187 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1188 deep_spr_loss_state = MAX_STOP_STATE + 1; 1189 for (i = 0; i < nr_pnv_idle_states; i++) { 1190 int err; 1191 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1192 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1193 1194 /* No deep loss driver implemented for POWER10 and POWER11 yet */ 1195 if ((pvr_version_is(PVR_POWER10) || pvr_version_is(PVR_POWER11)) && 1196 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1197 continue; 1198 1199 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1200 (pnv_first_tb_loss_level > psscr_rl)) 1201 pnv_first_tb_loss_level = psscr_rl; 1202 1203 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1204 (deep_spr_loss_state > psscr_rl)) 1205 deep_spr_loss_state = psscr_rl; 1206 1207 /* 1208 * The idle code does not deal with TB loss occurring 1209 * in a shallower state than SPR loss, so force it to 1210 * behave like SPRs are lost if TB is lost. POWER9 would 1211 * never encounter this, but a POWER8 core would if it 1212 * implemented the stop instruction. So this is for forward 1213 * compatibility. 1214 */ 1215 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1216 (deep_spr_loss_state > psscr_rl)) 1217 deep_spr_loss_state = psscr_rl; 1218 1219 err = validate_psscr_val_mask(&state->psscr_val, 1220 &state->psscr_mask, 1221 state->flags); 1222 if (err) { 1223 report_invalid_psscr_val(state->psscr_val, err); 1224 continue; 1225 } 1226 1227 state->valid = true; 1228 1229 if (max_residency_ns < state->residency_ns) { 1230 max_residency_ns = state->residency_ns; 1231 pnv_deepest_stop_psscr_val = state->psscr_val; 1232 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1233 pnv_deepest_stop_flag = state->flags; 1234 deepest_stop_found = true; 1235 } 1236 1237 if (!default_stop_found && 1238 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1239 pnv_default_stop_val = state->psscr_val; 1240 pnv_default_stop_mask = state->psscr_mask; 1241 default_stop_found = true; 1242 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1243 } 1244 } 1245 1246 if (unlikely(!default_stop_found)) { 1247 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1248 } else { 1249 ppc_md.power_save = arch300_idle; 1250 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1251 pnv_default_stop_val, pnv_default_stop_mask); 1252 } 1253 1254 if (unlikely(!deepest_stop_found)) { 1255 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1256 } else { 1257 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1258 pnv_deepest_stop_psscr_val, 1259 pnv_deepest_stop_psscr_mask); 1260 } 1261 1262 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", 1263 deep_spr_loss_state); 1264 1265 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", 1266 pnv_first_tb_loss_level); 1267 } 1268 1269 static void __init pnv_disable_deep_states(void) 1270 { 1271 /* 1272 * The stop-api is unable to restore hypervisor 1273 * resources on wakeup from platform idle states which 1274 * lose full context. So disable such states. 1275 */ 1276 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1277 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1278 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1279 1280 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1281 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1282 /* 1283 * Use the default stop state for CPU-Hotplug 1284 * if available. 1285 */ 1286 if (default_stop_found) { 1287 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1288 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1289 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1290 pnv_deepest_stop_psscr_val); 1291 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1292 deepest_stop_found = false; 1293 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1294 } 1295 } 1296 } 1297 1298 /* 1299 * Probe device tree for supported idle states 1300 */ 1301 static void __init pnv_probe_idle_states(void) 1302 { 1303 int i; 1304 1305 if (nr_pnv_idle_states < 0) { 1306 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1307 return; 1308 } 1309 1310 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1311 pnv_arch300_idle_init(); 1312 1313 for (i = 0; i < nr_pnv_idle_states; i++) 1314 supported_cpuidle_states |= pnv_idle_states[i].flags; 1315 } 1316 1317 /* 1318 * This function parses device-tree and populates all the information 1319 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1320 * which is the number of cpuidle states discovered through device-tree. 1321 */ 1322 1323 static int __init pnv_parse_cpuidle_dt(void) 1324 { 1325 struct device_node *np; 1326 int nr_idle_states, i; 1327 int rc = 0; 1328 u32 *temp_u32; 1329 u64 *temp_u64; 1330 const char **temp_string; 1331 1332 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1333 if (!np) { 1334 pr_warn("opal: PowerMgmt Node not found\n"); 1335 return -ENODEV; 1336 } 1337 nr_idle_states = of_property_count_u32_elems(np, 1338 "ibm,cpu-idle-state-flags"); 1339 1340 pnv_idle_states = kzalloc_objs(*pnv_idle_states, nr_idle_states); 1341 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1342 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1343 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1344 1345 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1346 pr_err("Could not allocate memory for dt parsing\n"); 1347 rc = -ENOMEM; 1348 goto out; 1349 } 1350 1351 /* Read flags */ 1352 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1353 temp_u32, nr_idle_states)) { 1354 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1355 rc = -EINVAL; 1356 goto out; 1357 } 1358 for (i = 0; i < nr_idle_states; i++) 1359 pnv_idle_states[i].flags = temp_u32[i]; 1360 1361 /* Read latencies */ 1362 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1363 temp_u32, nr_idle_states)) { 1364 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1365 rc = -EINVAL; 1366 goto out; 1367 } 1368 for (i = 0; i < nr_idle_states; i++) 1369 pnv_idle_states[i].latency_ns = temp_u32[i]; 1370 1371 /* Read residencies */ 1372 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1373 temp_u32, nr_idle_states)) { 1374 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 1375 rc = -EINVAL; 1376 goto out; 1377 } 1378 for (i = 0; i < nr_idle_states; i++) 1379 pnv_idle_states[i].residency_ns = temp_u32[i]; 1380 1381 /* For power9 and later */ 1382 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1383 /* Read pm_crtl_val */ 1384 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1385 temp_u64, nr_idle_states)) { 1386 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1387 rc = -EINVAL; 1388 goto out; 1389 } 1390 for (i = 0; i < nr_idle_states; i++) 1391 pnv_idle_states[i].psscr_val = temp_u64[i]; 1392 1393 /* Read pm_crtl_mask */ 1394 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1395 temp_u64, nr_idle_states)) { 1396 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1397 rc = -EINVAL; 1398 goto out; 1399 } 1400 for (i = 0; i < nr_idle_states; i++) 1401 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1402 } 1403 1404 /* 1405 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1406 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1407 * plan to use it in near future. Hence, not parsing these properties 1408 */ 1409 1410 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1411 temp_string, nr_idle_states) < 0) { 1412 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1413 rc = -EINVAL; 1414 goto out; 1415 } 1416 for (i = 0; i < nr_idle_states; i++) 1417 strscpy(pnv_idle_states[i].name, temp_string[i], 1418 PNV_IDLE_NAME_LEN); 1419 nr_pnv_idle_states = nr_idle_states; 1420 rc = 0; 1421 out: 1422 kfree(temp_u32); 1423 kfree(temp_u64); 1424 kfree(temp_string); 1425 of_node_put(np); 1426 return rc; 1427 } 1428 1429 static int __init pnv_init_idle_states(void) 1430 { 1431 int cpu; 1432 int rc = 0; 1433 1434 /* Set up PACA fields */ 1435 for_each_present_cpu(cpu) { 1436 struct paca_struct *p = paca_ptrs[cpu]; 1437 1438 p->idle_state = 0; 1439 if (cpu == cpu_first_thread_sibling(cpu)) 1440 p->idle_state = (1 << threads_per_core) - 1; 1441 1442 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1443 /* P7/P8 nap */ 1444 p->thread_idle_state = PNV_THREAD_RUNNING; 1445 } else if (pvr_version_is(PVR_POWER9)) { 1446 /* P9 stop workarounds */ 1447 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1448 p->requested_psscr = 0; 1449 atomic_set(&p->dont_stop, 0); 1450 #endif 1451 } 1452 } 1453 1454 /* In case we error out nr_pnv_idle_states will be zero */ 1455 nr_pnv_idle_states = 0; 1456 supported_cpuidle_states = 0; 1457 1458 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1459 goto out; 1460 rc = pnv_parse_cpuidle_dt(); 1461 if (rc) 1462 return rc; 1463 pnv_probe_idle_states(); 1464 1465 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1466 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1467 power7_fastsleep_workaround_entry = false; 1468 power7_fastsleep_workaround_exit = false; 1469 } else { 1470 struct device *dev_root; 1471 /* 1472 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1473 * workaround is needed to use fastsleep. Provide sysfs 1474 * control to choose how this workaround has to be 1475 * applied. 1476 */ 1477 dev_root = bus_get_dev_root(&cpu_subsys); 1478 if (dev_root) { 1479 device_create_file(dev_root, 1480 &dev_attr_fastsleep_workaround_applyonce); 1481 put_device(dev_root); 1482 } 1483 } 1484 1485 update_subcore_sibling_mask(); 1486 1487 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1488 ppc_md.power_save = power7_idle; 1489 power7_offline_type = PNV_THREAD_NAP; 1490 } 1491 1492 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1493 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1494 power7_offline_type = PNV_THREAD_WINKLE; 1495 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1496 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1497 power7_offline_type = PNV_THREAD_SLEEP; 1498 } 1499 1500 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1501 if (pnv_save_sprs_for_deep_states()) 1502 pnv_disable_deep_states(); 1503 } 1504 1505 out: 1506 return 0; 1507 } 1508 machine_subsys_initcall(powernv, pnv_init_idle_states); 1509