1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * PowerNV cpuidle code 4 * 5 * Copyright 2015 IBM Corp. 6 */ 7 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/slab.h> 11 #include <linux/of.h> 12 #include <linux/device.h> 13 #include <linux/cpu.h> 14 15 #include <asm/firmware.h> 16 #include <asm/interrupt.h> 17 #include <asm/machdep.h> 18 #include <asm/opal.h> 19 #include <asm/cputhreads.h> 20 #include <asm/cpuidle.h> 21 #include <asm/text-patching.h> 22 #include <asm/smp.h> 23 #include <asm/runlatch.h> 24 #include <asm/dbell.h> 25 26 #include "powernv.h" 27 #include "subcore.h" 28 29 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 30 #define MAX_STOP_STATE 0xF 31 32 #define P9_STOP_SPR_MSR 2000 33 #define P9_STOP_SPR_PSSCR 855 34 35 static u32 supported_cpuidle_states; 36 struct pnv_idle_states_t *pnv_idle_states; 37 int nr_pnv_idle_states; 38 39 /* 40 * The default stop state that will be used by ppc_md.power_save 41 * function on platforms that support stop instruction. 42 */ 43 static u64 pnv_default_stop_val; 44 static u64 pnv_default_stop_mask; 45 static bool default_stop_found; 46 47 /* 48 * First stop state levels when SPR and TB loss can occur. 49 */ 50 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 51 static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; 52 53 /* 54 * psscr value and mask of the deepest stop idle state. 55 * Used when a cpu is offlined. 56 */ 57 static u64 pnv_deepest_stop_psscr_val; 58 static u64 pnv_deepest_stop_psscr_mask; 59 static u64 pnv_deepest_stop_flag; 60 static bool deepest_stop_found; 61 62 static unsigned long power7_offline_type; 63 64 static int __init pnv_save_sprs_for_deep_states(void) 65 { 66 int cpu; 67 int rc; 68 69 /* 70 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 71 * all cpus at boot. Get these reg values of current cpu and use the 72 * same across all cpus. 73 */ 74 uint64_t lpcr_val = mfspr(SPRN_LPCR); 75 uint64_t hid0_val = mfspr(SPRN_HID0); 76 uint64_t hmeer_val = mfspr(SPRN_HMEER); 77 uint64_t msr_val = MSR_IDLE; 78 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 79 80 for_each_present_cpu(cpu) { 81 uint64_t pir = get_hard_smp_processor_id(cpu); 82 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 83 84 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 85 if (rc != 0) 86 return rc; 87 88 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 89 if (rc != 0) 90 return rc; 91 92 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 93 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 94 if (rc) 95 return rc; 96 97 rc = opal_slw_set_reg(pir, 98 P9_STOP_SPR_PSSCR, psscr_val); 99 100 if (rc) 101 return rc; 102 } 103 104 /* HIDs are per core registers */ 105 if (cpu_thread_in_core(cpu) == 0) { 106 107 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 108 if (rc != 0) 109 return rc; 110 111 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 112 if (rc != 0) 113 return rc; 114 115 /* Only p8 needs to set extra HID registers */ 116 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 117 uint64_t hid1_val = mfspr(SPRN_HID1); 118 uint64_t hid4_val = mfspr(SPRN_HID4); 119 uint64_t hid5_val = mfspr(SPRN_HID5); 120 121 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 122 if (rc != 0) 123 return rc; 124 125 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 126 if (rc != 0) 127 return rc; 128 129 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 130 if (rc != 0) 131 return rc; 132 } 133 } 134 } 135 136 return 0; 137 } 138 139 u32 pnv_get_supported_cpuidle_states(void) 140 { 141 return supported_cpuidle_states; 142 } 143 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 144 145 static void pnv_fastsleep_workaround_apply(void *info) 146 147 { 148 int cpu = smp_processor_id(); 149 int rc; 150 int *err = info; 151 152 if (cpu_first_thread_sibling(cpu) != cpu) 153 return; 154 155 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 156 OPAL_CONFIG_IDLE_APPLY); 157 if (rc) 158 *err = 1; 159 } 160 161 static bool power7_fastsleep_workaround_entry = true; 162 static bool power7_fastsleep_workaround_exit = true; 163 164 /* 165 * Used to store fastsleep workaround state 166 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 167 * 1 - Workaround applied once, never undone. 168 */ 169 static u8 fastsleep_workaround_applyonce; 170 171 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 172 struct device_attribute *attr, char *buf) 173 { 174 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 175 } 176 177 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 178 struct device_attribute *attr, const char *buf, 179 size_t count) 180 { 181 int err; 182 u8 val; 183 184 if (kstrtou8(buf, 0, &val) || val != 1) 185 return -EINVAL; 186 187 if (fastsleep_workaround_applyonce == 1) 188 return count; 189 190 /* 191 * fastsleep_workaround_applyonce = 1 implies 192 * fastsleep workaround needs to be left in 'applied' state on all 193 * the cores. Do this by- 194 * 1. Disable the 'undo' workaround in fastsleep exit path 195 * 2. Sendi IPIs to all the cores which have at least one online thread 196 * 3. Disable the 'apply' workaround in fastsleep entry path 197 * 198 * There is no need to send ipi to cores which have all threads 199 * offlined, as last thread of the core entering fastsleep or deeper 200 * state would have applied workaround. 201 */ 202 power7_fastsleep_workaround_exit = false; 203 204 cpus_read_lock(); 205 on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); 206 cpus_read_unlock(); 207 if (err) { 208 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 209 goto fail; 210 } 211 212 power7_fastsleep_workaround_entry = false; 213 214 fastsleep_workaround_applyonce = 1; 215 216 return count; 217 fail: 218 return -EIO; 219 } 220 221 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 222 show_fastsleep_workaround_applyonce, 223 store_fastsleep_workaround_applyonce); 224 225 static inline void atomic_start_thread_idle(void) 226 { 227 int cpu = raw_smp_processor_id(); 228 int first = cpu_first_thread_sibling(cpu); 229 int thread_nr = cpu_thread_in_core(cpu); 230 unsigned long *state = &paca_ptrs[first]->idle_state; 231 232 clear_bit(thread_nr, state); 233 } 234 235 static inline void atomic_stop_thread_idle(void) 236 { 237 int cpu = raw_smp_processor_id(); 238 int first = cpu_first_thread_sibling(cpu); 239 int thread_nr = cpu_thread_in_core(cpu); 240 unsigned long *state = &paca_ptrs[first]->idle_state; 241 242 set_bit(thread_nr, state); 243 } 244 245 static inline void atomic_lock_thread_idle(void) 246 { 247 int cpu = raw_smp_processor_id(); 248 int first = cpu_first_thread_sibling(cpu); 249 unsigned long *lock = &paca_ptrs[first]->idle_lock; 250 251 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock))) 252 barrier(); 253 } 254 255 static inline void atomic_unlock_and_stop_thread_idle(void) 256 { 257 int cpu = raw_smp_processor_id(); 258 int first = cpu_first_thread_sibling(cpu); 259 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 260 unsigned long *state = &paca_ptrs[first]->idle_state; 261 unsigned long *lock = &paca_ptrs[first]->idle_lock; 262 u64 s = READ_ONCE(*state); 263 u64 new, tmp; 264 265 BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT)); 266 BUG_ON(s & thread); 267 268 again: 269 new = s | thread; 270 tmp = cmpxchg(state, s, new); 271 if (unlikely(tmp != s)) { 272 s = tmp; 273 goto again; 274 } 275 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); 276 } 277 278 static inline void atomic_unlock_thread_idle(void) 279 { 280 int cpu = raw_smp_processor_id(); 281 int first = cpu_first_thread_sibling(cpu); 282 unsigned long *lock = &paca_ptrs[first]->idle_lock; 283 284 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock)); 285 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock); 286 } 287 288 /* P7 and P8 */ 289 struct p7_sprs { 290 /* per core */ 291 u64 tscr; 292 u64 worc; 293 294 /* per subcore */ 295 u64 sdr1; 296 u64 rpr; 297 298 /* per thread */ 299 u64 lpcr; 300 u64 hfscr; 301 u64 fscr; 302 u64 purr; 303 u64 spurr; 304 u64 dscr; 305 u64 wort; 306 307 /* per thread SPRs that get lost in shallow states */ 308 u64 amr; 309 u64 iamr; 310 u64 uamor; 311 /* amor is restored to constant ~0 */ 312 }; 313 314 static unsigned long power7_idle_insn(unsigned long type) 315 { 316 int cpu = raw_smp_processor_id(); 317 int first = cpu_first_thread_sibling(cpu); 318 unsigned long *state = &paca_ptrs[first]->idle_state; 319 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 320 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 321 unsigned long srr1; 322 bool full_winkle; 323 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 324 bool sprs_saved = false; 325 int rc; 326 327 if (unlikely(type != PNV_THREAD_NAP)) { 328 atomic_lock_thread_idle(); 329 330 BUG_ON(!(*state & thread)); 331 *state &= ~thread; 332 333 if (power7_fastsleep_workaround_entry) { 334 if ((*state & core_thread_mask) == 0) { 335 rc = opal_config_cpu_idle_state( 336 OPAL_CONFIG_IDLE_FASTSLEEP, 337 OPAL_CONFIG_IDLE_APPLY); 338 BUG_ON(rc); 339 } 340 } 341 342 if (type == PNV_THREAD_WINKLE) { 343 sprs.tscr = mfspr(SPRN_TSCR); 344 sprs.worc = mfspr(SPRN_WORC); 345 346 sprs.sdr1 = mfspr(SPRN_SDR1); 347 sprs.rpr = mfspr(SPRN_RPR); 348 349 sprs.lpcr = mfspr(SPRN_LPCR); 350 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 351 sprs.hfscr = mfspr(SPRN_HFSCR); 352 sprs.fscr = mfspr(SPRN_FSCR); 353 } 354 sprs.purr = mfspr(SPRN_PURR); 355 sprs.spurr = mfspr(SPRN_SPURR); 356 sprs.dscr = mfspr(SPRN_DSCR); 357 sprs.wort = mfspr(SPRN_WORT); 358 359 sprs_saved = true; 360 361 /* 362 * Increment winkle counter and set all winkle bits if 363 * all threads are winkling. This allows wakeup side to 364 * distinguish between fast sleep and winkle state 365 * loss. Fast sleep still has to resync the timebase so 366 * this may not be a really big win. 367 */ 368 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 369 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 370 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 371 == threads_per_core) 372 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 373 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 374 } 375 376 atomic_unlock_thread_idle(); 377 } 378 379 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 380 sprs.amr = mfspr(SPRN_AMR); 381 sprs.iamr = mfspr(SPRN_IAMR); 382 sprs.uamor = mfspr(SPRN_UAMOR); 383 } 384 385 local_paca->thread_idle_state = type; 386 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 387 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 388 389 WARN_ON_ONCE(!srr1); 390 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 391 392 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 393 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 394 /* 395 * We don't need an isync after the mtsprs here because 396 * the upcoming mtmsrd is execution synchronizing. 397 */ 398 mtspr(SPRN_AMR, sprs.amr); 399 mtspr(SPRN_IAMR, sprs.iamr); 400 mtspr(SPRN_AMOR, ~0); 401 mtspr(SPRN_UAMOR, sprs.uamor); 402 } 403 } 404 405 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 406 hmi_exception_realmode(NULL); 407 408 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 409 if (unlikely(type != PNV_THREAD_NAP)) { 410 atomic_lock_thread_idle(); 411 if (type == PNV_THREAD_WINKLE) { 412 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 413 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 414 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 415 } 416 atomic_unlock_and_stop_thread_idle(); 417 } 418 return srr1; 419 } 420 421 /* HV state loss */ 422 BUG_ON(type == PNV_THREAD_NAP); 423 424 atomic_lock_thread_idle(); 425 426 full_winkle = false; 427 if (type == PNV_THREAD_WINKLE) { 428 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 429 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 430 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 431 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 432 full_winkle = true; 433 BUG_ON(!sprs_saved); 434 } 435 } 436 437 WARN_ON(*state & thread); 438 439 if ((*state & core_thread_mask) != 0) 440 goto core_woken; 441 442 /* Per-core SPRs */ 443 if (full_winkle) { 444 mtspr(SPRN_TSCR, sprs.tscr); 445 mtspr(SPRN_WORC, sprs.worc); 446 } 447 448 if (power7_fastsleep_workaround_exit) { 449 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 450 OPAL_CONFIG_IDLE_UNDO); 451 BUG_ON(rc); 452 } 453 454 /* TB */ 455 if (opal_resync_timebase() != OPAL_SUCCESS) 456 BUG(); 457 458 core_woken: 459 if (!full_winkle) 460 goto subcore_woken; 461 462 if ((*state & local_paca->subcore_sibling_mask) != 0) 463 goto subcore_woken; 464 465 /* Per-subcore SPRs */ 466 mtspr(SPRN_SDR1, sprs.sdr1); 467 mtspr(SPRN_RPR, sprs.rpr); 468 469 subcore_woken: 470 /* 471 * isync after restoring shared SPRs and before unlocking. Unlock 472 * only contains hwsync which does not necessarily do the right 473 * thing for SPRs. 474 */ 475 isync(); 476 atomic_unlock_and_stop_thread_idle(); 477 478 /* Fast sleep does not lose SPRs */ 479 if (!full_winkle) 480 return srr1; 481 482 /* Per-thread SPRs */ 483 mtspr(SPRN_LPCR, sprs.lpcr); 484 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 485 mtspr(SPRN_HFSCR, sprs.hfscr); 486 mtspr(SPRN_FSCR, sprs.fscr); 487 } 488 mtspr(SPRN_PURR, sprs.purr); 489 mtspr(SPRN_SPURR, sprs.spurr); 490 mtspr(SPRN_DSCR, sprs.dscr); 491 mtspr(SPRN_WORT, sprs.wort); 492 493 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 494 495 #ifdef CONFIG_PPC_64S_HASH_MMU 496 /* 497 * The SLB has to be restored here, but it sometimes still 498 * contains entries, so the __ variant must be used to prevent 499 * multi hits. 500 */ 501 __slb_restore_bolted_realmode(); 502 #endif 503 504 return srr1; 505 } 506 507 extern unsigned long idle_kvm_start_guest(unsigned long srr1); 508 509 #ifdef CONFIG_HOTPLUG_CPU 510 static unsigned long power7_offline(void) 511 { 512 unsigned long srr1; 513 514 mtmsr(MSR_IDLE); 515 516 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 517 /* Tell KVM we're entering idle. */ 518 /******************************************************/ 519 /* N O T E W E L L ! ! ! N O T E W E L L */ 520 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 521 /* MUST occur in real mode, i.e. with the MMU off, */ 522 /* and the MMU must stay off until we clear this flag */ 523 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 524 /* pnv_powersave_wakeup in this file. */ 525 /* The reason is that another thread can switch the */ 526 /* MMU to a guest context whenever this flag is set */ 527 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 528 /* that would potentially cause this thread to start */ 529 /* executing instructions from guest memory in */ 530 /* hypervisor mode, leading to a host crash or data */ 531 /* corruption, or worse. */ 532 /******************************************************/ 533 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 534 #endif 535 536 __ppc64_runlatch_off(); 537 srr1 = power7_idle_insn(power7_offline_type); 538 __ppc64_runlatch_on(); 539 540 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 541 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 542 /* Order setting hwthread_state vs. testing hwthread_req */ 543 smp_mb(); 544 if (local_paca->kvm_hstate.hwthread_req) 545 srr1 = idle_kvm_start_guest(srr1); 546 #endif 547 548 mtmsr(MSR_KERNEL); 549 550 return srr1; 551 } 552 #endif 553 554 void power7_idle_type(unsigned long type) 555 { 556 unsigned long srr1; 557 558 if (!prep_irq_for_idle_irqsoff()) 559 return; 560 561 mtmsr(MSR_IDLE); 562 __ppc64_runlatch_off(); 563 srr1 = power7_idle_insn(type); 564 __ppc64_runlatch_on(); 565 mtmsr(MSR_KERNEL); 566 567 fini_irq_for_idle_irqsoff(); 568 irq_set_pending_from_srr1(srr1); 569 } 570 571 static void power7_idle(void) 572 { 573 if (!powersave_nap) 574 return; 575 576 power7_idle_type(PNV_THREAD_NAP); 577 } 578 579 struct p9_sprs { 580 /* per core */ 581 u64 ptcr; 582 u64 rpr; 583 u64 tscr; 584 u64 ldbar; 585 586 /* per thread */ 587 u64 lpcr; 588 u64 hfscr; 589 u64 fscr; 590 u64 pid; 591 u64 purr; 592 u64 spurr; 593 u64 dscr; 594 u64 ciabr; 595 596 u64 mmcra; 597 u32 mmcr0; 598 u32 mmcr1; 599 u64 mmcr2; 600 601 /* per thread SPRs that get lost in shallow states */ 602 u64 amr; 603 u64 iamr; 604 u64 amor; 605 u64 uamor; 606 }; 607 608 static unsigned long power9_idle_stop(unsigned long psscr) 609 { 610 int cpu = raw_smp_processor_id(); 611 int first = cpu_first_thread_sibling(cpu); 612 unsigned long *state = &paca_ptrs[first]->idle_state; 613 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 614 unsigned long srr1; 615 unsigned long pls; 616 unsigned long mmcr0 = 0; 617 unsigned long mmcra = 0; 618 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 619 bool sprs_saved = false; 620 621 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 622 /* EC=ESL=0 case */ 623 624 /* 625 * Wake synchronously. SRESET via xscom may still cause 626 * a 0x100 powersave wakeup with SRR1 reason! 627 */ 628 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 629 if (likely(!srr1)) 630 return 0; 631 632 /* 633 * Registers not saved, can't recover! 634 * This would be a hardware bug 635 */ 636 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 637 638 goto out; 639 } 640 641 /* EC=ESL=1 case */ 642 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 643 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 644 local_paca->requested_psscr = psscr; 645 /* order setting requested_psscr vs testing dont_stop */ 646 smp_mb(); 647 if (atomic_read(&local_paca->dont_stop)) { 648 local_paca->requested_psscr = 0; 649 return 0; 650 } 651 } 652 #endif 653 654 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 655 /* 656 * POWER9 DD2 can incorrectly set PMAO when waking up 657 * after a state-loss idle. Saving and restoring MMCR0 658 * over idle is a workaround. 659 */ 660 mmcr0 = mfspr(SPRN_MMCR0); 661 } 662 663 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 664 sprs.lpcr = mfspr(SPRN_LPCR); 665 sprs.hfscr = mfspr(SPRN_HFSCR); 666 sprs.fscr = mfspr(SPRN_FSCR); 667 sprs.pid = mfspr(SPRN_PID); 668 sprs.purr = mfspr(SPRN_PURR); 669 sprs.spurr = mfspr(SPRN_SPURR); 670 sprs.dscr = mfspr(SPRN_DSCR); 671 sprs.ciabr = mfspr(SPRN_CIABR); 672 673 sprs.mmcra = mfspr(SPRN_MMCRA); 674 sprs.mmcr0 = mfspr(SPRN_MMCR0); 675 sprs.mmcr1 = mfspr(SPRN_MMCR1); 676 sprs.mmcr2 = mfspr(SPRN_MMCR2); 677 678 sprs.ptcr = mfspr(SPRN_PTCR); 679 sprs.rpr = mfspr(SPRN_RPR); 680 sprs.tscr = mfspr(SPRN_TSCR); 681 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 682 sprs.ldbar = mfspr(SPRN_LDBAR); 683 684 sprs_saved = true; 685 686 atomic_start_thread_idle(); 687 } 688 689 sprs.amr = mfspr(SPRN_AMR); 690 sprs.iamr = mfspr(SPRN_IAMR); 691 sprs.uamor = mfspr(SPRN_UAMOR); 692 693 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 694 695 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 696 local_paca->requested_psscr = 0; 697 #endif 698 699 psscr = mfspr(SPRN_PSSCR); 700 701 WARN_ON_ONCE(!srr1); 702 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 703 704 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 705 /* 706 * We don't need an isync after the mtsprs here because the 707 * upcoming mtmsrd is execution synchronizing. 708 */ 709 mtspr(SPRN_AMR, sprs.amr); 710 mtspr(SPRN_IAMR, sprs.iamr); 711 mtspr(SPRN_AMOR, ~0); 712 mtspr(SPRN_UAMOR, sprs.uamor); 713 714 /* 715 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 716 * might have been corrupted and needs flushing. We also need 717 * to reload MMCR0 (see mmcr0 comment above). 718 */ 719 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 720 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 721 mtspr(SPRN_MMCR0, mmcr0); 722 } 723 724 /* 725 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 726 * to ensure the PMU starts running. 727 */ 728 mmcra = mfspr(SPRN_MMCRA); 729 mmcra |= PPC_BIT(60); 730 mtspr(SPRN_MMCRA, mmcra); 731 mmcra &= ~PPC_BIT(60); 732 mtspr(SPRN_MMCRA, mmcra); 733 } 734 735 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 736 hmi_exception_realmode(NULL); 737 738 /* 739 * On POWER9, SRR1 bits do not match exactly as expected. 740 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 741 * just always test PSSCR for SPR/TB state loss. 742 */ 743 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 744 if (likely(pls < deep_spr_loss_state)) { 745 if (sprs_saved) 746 atomic_stop_thread_idle(); 747 goto out; 748 } 749 750 /* HV state loss */ 751 BUG_ON(!sprs_saved); 752 753 atomic_lock_thread_idle(); 754 755 if ((*state & core_thread_mask) != 0) 756 goto core_woken; 757 758 /* Per-core SPRs */ 759 mtspr(SPRN_PTCR, sprs.ptcr); 760 mtspr(SPRN_RPR, sprs.rpr); 761 mtspr(SPRN_TSCR, sprs.tscr); 762 763 if (pls >= pnv_first_tb_loss_level) { 764 /* TB loss */ 765 if (opal_resync_timebase() != OPAL_SUCCESS) 766 BUG(); 767 } 768 769 /* 770 * isync after restoring shared SPRs and before unlocking. Unlock 771 * only contains hwsync which does not necessarily do the right 772 * thing for SPRs. 773 */ 774 isync(); 775 776 core_woken: 777 atomic_unlock_and_stop_thread_idle(); 778 779 /* Per-thread SPRs */ 780 mtspr(SPRN_LPCR, sprs.lpcr); 781 mtspr(SPRN_HFSCR, sprs.hfscr); 782 mtspr(SPRN_FSCR, sprs.fscr); 783 mtspr(SPRN_PID, sprs.pid); 784 mtspr(SPRN_PURR, sprs.purr); 785 mtspr(SPRN_SPURR, sprs.spurr); 786 mtspr(SPRN_DSCR, sprs.dscr); 787 mtspr(SPRN_CIABR, sprs.ciabr); 788 789 mtspr(SPRN_MMCRA, sprs.mmcra); 790 mtspr(SPRN_MMCR0, sprs.mmcr0); 791 mtspr(SPRN_MMCR1, sprs.mmcr1); 792 mtspr(SPRN_MMCR2, sprs.mmcr2); 793 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 794 mtspr(SPRN_LDBAR, sprs.ldbar); 795 796 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 797 798 if (!radix_enabled()) 799 __slb_restore_bolted_realmode(); 800 801 out: 802 mtmsr(MSR_KERNEL); 803 804 return srr1; 805 } 806 807 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 808 /* 809 * This is used in working around bugs in thread reconfiguration 810 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 811 * memory and the way that XER[SO] is checkpointed. 812 * This function forces the core into SMT4 in order by asking 813 * all other threads not to stop, and sending a message to any 814 * that are in a stop state. 815 * Must be called with preemption disabled. 816 */ 817 void pnv_power9_force_smt4_catch(void) 818 { 819 int cpu, cpu0, thr; 820 int awake_threads = 1; /* this thread is awake */ 821 int poke_threads = 0; 822 int need_awake = threads_per_core; 823 824 cpu = smp_processor_id(); 825 cpu0 = cpu & ~(threads_per_core - 1); 826 for (thr = 0; thr < threads_per_core; ++thr) { 827 if (cpu != cpu0 + thr) 828 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 829 } 830 /* order setting dont_stop vs testing requested_psscr */ 831 smp_mb(); 832 for (thr = 0; thr < threads_per_core; ++thr) { 833 if (!paca_ptrs[cpu0+thr]->requested_psscr) 834 ++awake_threads; 835 else 836 poke_threads |= (1 << thr); 837 } 838 839 /* If at least 3 threads are awake, the core is in SMT4 already */ 840 if (awake_threads < need_awake) { 841 /* We have to wake some threads; we'll use msgsnd */ 842 for (thr = 0; thr < threads_per_core; ++thr) { 843 if (poke_threads & (1 << thr)) { 844 ppc_msgsnd_sync(); 845 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 846 paca_ptrs[cpu0+thr]->hw_cpu_id); 847 } 848 } 849 /* now spin until at least 3 threads are awake */ 850 do { 851 for (thr = 0; thr < threads_per_core; ++thr) { 852 if ((poke_threads & (1 << thr)) && 853 !paca_ptrs[cpu0+thr]->requested_psscr) { 854 ++awake_threads; 855 poke_threads &= ~(1 << thr); 856 } 857 } 858 } while (awake_threads < need_awake); 859 } 860 } 861 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 862 863 void pnv_power9_force_smt4_release(void) 864 { 865 int cpu, cpu0, thr; 866 867 cpu = smp_processor_id(); 868 cpu0 = cpu & ~(threads_per_core - 1); 869 870 /* clear all the dont_stop flags */ 871 for (thr = 0; thr < threads_per_core; ++thr) { 872 if (cpu != cpu0 + thr) 873 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 874 } 875 } 876 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 877 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 878 879 struct p10_sprs { 880 /* 881 * SPRs that get lost in shallow states: 882 * 883 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 884 * isa300 idle routines restore CR, LR. 885 * CTR is volatile 886 * idle thread doesn't use FP or VEC 887 * kernel doesn't use TAR 888 * HSPRG1 is only live in HV interrupt entry 889 * SPRG2 is only live in KVM guests, KVM handles it. 890 */ 891 }; 892 893 static unsigned long power10_idle_stop(unsigned long psscr) 894 { 895 int cpu = raw_smp_processor_id(); 896 int first = cpu_first_thread_sibling(cpu); 897 unsigned long *state = &paca_ptrs[first]->idle_state; 898 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 899 unsigned long srr1; 900 unsigned long pls; 901 // struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 902 bool sprs_saved = false; 903 904 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 905 /* EC=ESL=0 case */ 906 907 /* 908 * Wake synchronously. SRESET via xscom may still cause 909 * a 0x100 powersave wakeup with SRR1 reason! 910 */ 911 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 912 if (likely(!srr1)) 913 return 0; 914 915 /* 916 * Registers not saved, can't recover! 917 * This would be a hardware bug 918 */ 919 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 920 921 goto out; 922 } 923 924 /* EC=ESL=1 case */ 925 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 926 /* XXX: save SPRs for deep state loss here. */ 927 928 sprs_saved = true; 929 930 atomic_start_thread_idle(); 931 } 932 933 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 934 935 psscr = mfspr(SPRN_PSSCR); 936 937 WARN_ON_ONCE(!srr1); 938 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 939 940 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 941 hmi_exception_realmode(NULL); 942 943 /* 944 * On POWER10, SRR1 bits do not match exactly as expected. 945 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 946 * just always test PSSCR for SPR/TB state loss. 947 */ 948 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 949 if (likely(pls < deep_spr_loss_state)) { 950 if (sprs_saved) 951 atomic_stop_thread_idle(); 952 goto out; 953 } 954 955 /* HV state loss */ 956 BUG_ON(!sprs_saved); 957 958 atomic_lock_thread_idle(); 959 960 if ((*state & core_thread_mask) != 0) 961 goto core_woken; 962 963 /* XXX: restore per-core SPRs here */ 964 965 if (pls >= pnv_first_tb_loss_level) { 966 /* TB loss */ 967 if (opal_resync_timebase() != OPAL_SUCCESS) 968 BUG(); 969 } 970 971 /* 972 * isync after restoring shared SPRs and before unlocking. Unlock 973 * only contains hwsync which does not necessarily do the right 974 * thing for SPRs. 975 */ 976 isync(); 977 978 core_woken: 979 atomic_unlock_and_stop_thread_idle(); 980 981 /* XXX: restore per-thread SPRs here */ 982 983 if (!radix_enabled()) 984 __slb_restore_bolted_realmode(); 985 986 out: 987 mtmsr(MSR_KERNEL); 988 989 return srr1; 990 } 991 992 #ifdef CONFIG_HOTPLUG_CPU 993 static unsigned long arch300_offline_stop(unsigned long psscr) 994 { 995 unsigned long srr1; 996 997 if (cpu_has_feature(CPU_FTR_ARCH_31)) 998 srr1 = power10_idle_stop(psscr); 999 else 1000 srr1 = power9_idle_stop(psscr); 1001 1002 return srr1; 1003 } 1004 #endif 1005 1006 void arch300_idle_type(unsigned long stop_psscr_val, 1007 unsigned long stop_psscr_mask) 1008 { 1009 unsigned long psscr; 1010 unsigned long srr1; 1011 1012 if (!prep_irq_for_idle_irqsoff()) 1013 return; 1014 1015 psscr = mfspr(SPRN_PSSCR); 1016 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1017 1018 __ppc64_runlatch_off(); 1019 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1020 srr1 = power10_idle_stop(psscr); 1021 else 1022 srr1 = power9_idle_stop(psscr); 1023 __ppc64_runlatch_on(); 1024 1025 fini_irq_for_idle_irqsoff(); 1026 1027 irq_set_pending_from_srr1(srr1); 1028 } 1029 1030 /* 1031 * Used for ppc_md.power_save which needs a function with no parameters 1032 */ 1033 static void arch300_idle(void) 1034 { 1035 arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1036 } 1037 1038 #ifdef CONFIG_HOTPLUG_CPU 1039 1040 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 1041 { 1042 u64 pir = get_hard_smp_processor_id(cpu); 1043 1044 mtspr(SPRN_LPCR, lpcr_val); 1045 1046 /* 1047 * Program the LPCR via stop-api only if the deepest stop state 1048 * can lose hypervisor context. 1049 */ 1050 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 1051 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 1052 } 1053 1054 /* 1055 * pnv_cpu_offline: A function that puts the CPU into the deepest 1056 * available platform idle state on a CPU-Offline. 1057 * interrupts hard disabled and no lazy irq pending. 1058 */ 1059 unsigned long pnv_cpu_offline(unsigned int cpu) 1060 { 1061 unsigned long srr1; 1062 1063 __ppc64_runlatch_off(); 1064 1065 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 1066 unsigned long psscr; 1067 1068 psscr = mfspr(SPRN_PSSCR); 1069 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1070 pnv_deepest_stop_psscr_val; 1071 srr1 = arch300_offline_stop(psscr); 1072 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1073 srr1 = power7_offline(); 1074 } else { 1075 /* This is the fallback method. We emulate snooze */ 1076 while (!generic_check_cpu_restart(cpu)) { 1077 HMT_low(); 1078 HMT_very_low(); 1079 } 1080 srr1 = 0; 1081 HMT_medium(); 1082 } 1083 1084 __ppc64_runlatch_on(); 1085 1086 return srr1; 1087 } 1088 #endif 1089 1090 /* 1091 * Power ISA 3.0 idle initialization. 1092 * 1093 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1094 * Register (PSSCR) to control idle behavior. 1095 * 1096 * PSSCR layout: 1097 * ---------------------------------------------------------- 1098 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1099 * ---------------------------------------------------------- 1100 * 0 4 41 42 43 44 48 54 56 60 1101 * 1102 * PSSCR key fields: 1103 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1104 * lowest power-saving state the thread entered since stop instruction was 1105 * last executed. 1106 * 1107 * Bit 41 - Status Disable(SD) 1108 * 0 - Shows PLS entries 1109 * 1 - PLS entries are all 0 1110 * 1111 * Bit 42 - Enable State Loss 1112 * 0 - No state is lost irrespective of other fields 1113 * 1 - Allows state loss 1114 * 1115 * Bit 43 - Exit Criterion 1116 * 0 - Exit from power-save mode on any interrupt 1117 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1118 * 1119 * Bits 44:47 - Power-Saving Level Limit 1120 * This limits the power-saving level that can be entered into. 1121 * 1122 * Bits 60:63 - Requested Level 1123 * Used to specify which power-saving level must be entered on executing 1124 * stop instruction 1125 */ 1126 1127 int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1128 { 1129 int err = 0; 1130 1131 /* 1132 * psscr_mask == 0xf indicates an older firmware. 1133 * Set remaining fields of psscr to the default values. 1134 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1135 */ 1136 if (*psscr_mask == 0xf) { 1137 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1138 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1139 return err; 1140 } 1141 1142 /* 1143 * New firmware is expected to set the psscr_val bits correctly. 1144 * Validate that the following invariants are correctly maintained by 1145 * the new firmware. 1146 * - ESL bit value matches the EC bit value. 1147 * - ESL bit is set for all the deep stop states. 1148 */ 1149 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1150 err = ERR_EC_ESL_MISMATCH; 1151 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1152 GET_PSSCR_ESL(*psscr_val) == 0) { 1153 err = ERR_DEEP_STATE_ESL_MISMATCH; 1154 } 1155 1156 return err; 1157 } 1158 1159 /* 1160 * pnv_arch300_idle_init: Initializes the default idle state, first 1161 * deep idle state and deepest idle state on 1162 * ISA 3.0 CPUs. 1163 * 1164 * @np: /ibm,opal/power-mgt device node 1165 * @flags: cpu-idle-state-flags array 1166 * @dt_idle_states: Number of idle state entries 1167 * Returns 0 on success 1168 */ 1169 static void __init pnv_arch300_idle_init(void) 1170 { 1171 u64 max_residency_ns = 0; 1172 int i; 1173 1174 /* stop is not really architected, we only have p9,p10 and p11 drivers */ 1175 if (!pvr_version_is(PVR_POWER9) && !pvr_version_is(PVR_POWER10) && 1176 !pvr_version_is(PVR_POWER11)) 1177 return; 1178 1179 /* 1180 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1181 * the deepest stop state. 1182 * 1183 * pnv_default_stop_{val,mask} should be set to values corresponding to 1184 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1185 */ 1186 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1187 deep_spr_loss_state = MAX_STOP_STATE + 1; 1188 for (i = 0; i < nr_pnv_idle_states; i++) { 1189 int err; 1190 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1191 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1192 1193 /* No deep loss driver implemented for POWER10 and POWER11 yet */ 1194 if ((pvr_version_is(PVR_POWER10) || pvr_version_is(PVR_POWER11)) && 1195 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1196 continue; 1197 1198 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1199 (pnv_first_tb_loss_level > psscr_rl)) 1200 pnv_first_tb_loss_level = psscr_rl; 1201 1202 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1203 (deep_spr_loss_state > psscr_rl)) 1204 deep_spr_loss_state = psscr_rl; 1205 1206 /* 1207 * The idle code does not deal with TB loss occurring 1208 * in a shallower state than SPR loss, so force it to 1209 * behave like SPRs are lost if TB is lost. POWER9 would 1210 * never encounter this, but a POWER8 core would if it 1211 * implemented the stop instruction. So this is for forward 1212 * compatibility. 1213 */ 1214 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1215 (deep_spr_loss_state > psscr_rl)) 1216 deep_spr_loss_state = psscr_rl; 1217 1218 err = validate_psscr_val_mask(&state->psscr_val, 1219 &state->psscr_mask, 1220 state->flags); 1221 if (err) { 1222 report_invalid_psscr_val(state->psscr_val, err); 1223 continue; 1224 } 1225 1226 state->valid = true; 1227 1228 if (max_residency_ns < state->residency_ns) { 1229 max_residency_ns = state->residency_ns; 1230 pnv_deepest_stop_psscr_val = state->psscr_val; 1231 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1232 pnv_deepest_stop_flag = state->flags; 1233 deepest_stop_found = true; 1234 } 1235 1236 if (!default_stop_found && 1237 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1238 pnv_default_stop_val = state->psscr_val; 1239 pnv_default_stop_mask = state->psscr_mask; 1240 default_stop_found = true; 1241 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1242 } 1243 } 1244 1245 if (unlikely(!default_stop_found)) { 1246 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1247 } else { 1248 ppc_md.power_save = arch300_idle; 1249 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1250 pnv_default_stop_val, pnv_default_stop_mask); 1251 } 1252 1253 if (unlikely(!deepest_stop_found)) { 1254 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1255 } else { 1256 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1257 pnv_deepest_stop_psscr_val, 1258 pnv_deepest_stop_psscr_mask); 1259 } 1260 1261 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", 1262 deep_spr_loss_state); 1263 1264 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", 1265 pnv_first_tb_loss_level); 1266 } 1267 1268 static void __init pnv_disable_deep_states(void) 1269 { 1270 /* 1271 * The stop-api is unable to restore hypervisor 1272 * resources on wakeup from platform idle states which 1273 * lose full context. So disable such states. 1274 */ 1275 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1276 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1277 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1278 1279 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1280 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1281 /* 1282 * Use the default stop state for CPU-Hotplug 1283 * if available. 1284 */ 1285 if (default_stop_found) { 1286 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1287 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1288 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1289 pnv_deepest_stop_psscr_val); 1290 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1291 deepest_stop_found = false; 1292 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1293 } 1294 } 1295 } 1296 1297 /* 1298 * Probe device tree for supported idle states 1299 */ 1300 static void __init pnv_probe_idle_states(void) 1301 { 1302 int i; 1303 1304 if (nr_pnv_idle_states < 0) { 1305 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1306 return; 1307 } 1308 1309 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1310 pnv_arch300_idle_init(); 1311 1312 for (i = 0; i < nr_pnv_idle_states; i++) 1313 supported_cpuidle_states |= pnv_idle_states[i].flags; 1314 } 1315 1316 /* 1317 * This function parses device-tree and populates all the information 1318 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1319 * which is the number of cpuidle states discovered through device-tree. 1320 */ 1321 1322 static int __init pnv_parse_cpuidle_dt(void) 1323 { 1324 struct device_node *np; 1325 int nr_idle_states, i; 1326 int rc = 0; 1327 u32 *temp_u32; 1328 u64 *temp_u64; 1329 const char **temp_string; 1330 1331 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1332 if (!np) { 1333 pr_warn("opal: PowerMgmt Node not found\n"); 1334 return -ENODEV; 1335 } 1336 nr_idle_states = of_property_count_u32_elems(np, 1337 "ibm,cpu-idle-state-flags"); 1338 1339 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 1340 GFP_KERNEL); 1341 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1342 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1343 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1344 1345 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1346 pr_err("Could not allocate memory for dt parsing\n"); 1347 rc = -ENOMEM; 1348 goto out; 1349 } 1350 1351 /* Read flags */ 1352 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1353 temp_u32, nr_idle_states)) { 1354 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1355 rc = -EINVAL; 1356 goto out; 1357 } 1358 for (i = 0; i < nr_idle_states; i++) 1359 pnv_idle_states[i].flags = temp_u32[i]; 1360 1361 /* Read latencies */ 1362 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1363 temp_u32, nr_idle_states)) { 1364 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1365 rc = -EINVAL; 1366 goto out; 1367 } 1368 for (i = 0; i < nr_idle_states; i++) 1369 pnv_idle_states[i].latency_ns = temp_u32[i]; 1370 1371 /* Read residencies */ 1372 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1373 temp_u32, nr_idle_states)) { 1374 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 1375 rc = -EINVAL; 1376 goto out; 1377 } 1378 for (i = 0; i < nr_idle_states; i++) 1379 pnv_idle_states[i].residency_ns = temp_u32[i]; 1380 1381 /* For power9 and later */ 1382 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1383 /* Read pm_crtl_val */ 1384 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1385 temp_u64, nr_idle_states)) { 1386 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1387 rc = -EINVAL; 1388 goto out; 1389 } 1390 for (i = 0; i < nr_idle_states; i++) 1391 pnv_idle_states[i].psscr_val = temp_u64[i]; 1392 1393 /* Read pm_crtl_mask */ 1394 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1395 temp_u64, nr_idle_states)) { 1396 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1397 rc = -EINVAL; 1398 goto out; 1399 } 1400 for (i = 0; i < nr_idle_states; i++) 1401 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1402 } 1403 1404 /* 1405 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1406 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1407 * plan to use it in near future. Hence, not parsing these properties 1408 */ 1409 1410 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1411 temp_string, nr_idle_states) < 0) { 1412 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1413 rc = -EINVAL; 1414 goto out; 1415 } 1416 for (i = 0; i < nr_idle_states; i++) 1417 strscpy(pnv_idle_states[i].name, temp_string[i], 1418 PNV_IDLE_NAME_LEN); 1419 nr_pnv_idle_states = nr_idle_states; 1420 rc = 0; 1421 out: 1422 kfree(temp_u32); 1423 kfree(temp_u64); 1424 kfree(temp_string); 1425 of_node_put(np); 1426 return rc; 1427 } 1428 1429 static int __init pnv_init_idle_states(void) 1430 { 1431 int cpu; 1432 int rc = 0; 1433 1434 /* Set up PACA fields */ 1435 for_each_present_cpu(cpu) { 1436 struct paca_struct *p = paca_ptrs[cpu]; 1437 1438 p->idle_state = 0; 1439 if (cpu == cpu_first_thread_sibling(cpu)) 1440 p->idle_state = (1 << threads_per_core) - 1; 1441 1442 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1443 /* P7/P8 nap */ 1444 p->thread_idle_state = PNV_THREAD_RUNNING; 1445 } else if (pvr_version_is(PVR_POWER9)) { 1446 /* P9 stop workarounds */ 1447 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1448 p->requested_psscr = 0; 1449 atomic_set(&p->dont_stop, 0); 1450 #endif 1451 } 1452 } 1453 1454 /* In case we error out nr_pnv_idle_states will be zero */ 1455 nr_pnv_idle_states = 0; 1456 supported_cpuidle_states = 0; 1457 1458 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1459 goto out; 1460 rc = pnv_parse_cpuidle_dt(); 1461 if (rc) 1462 return rc; 1463 pnv_probe_idle_states(); 1464 1465 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1466 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1467 power7_fastsleep_workaround_entry = false; 1468 power7_fastsleep_workaround_exit = false; 1469 } else { 1470 struct device *dev_root; 1471 /* 1472 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1473 * workaround is needed to use fastsleep. Provide sysfs 1474 * control to choose how this workaround has to be 1475 * applied. 1476 */ 1477 dev_root = bus_get_dev_root(&cpu_subsys); 1478 if (dev_root) { 1479 device_create_file(dev_root, 1480 &dev_attr_fastsleep_workaround_applyonce); 1481 put_device(dev_root); 1482 } 1483 } 1484 1485 update_subcore_sibling_mask(); 1486 1487 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1488 ppc_md.power_save = power7_idle; 1489 power7_offline_type = PNV_THREAD_NAP; 1490 } 1491 1492 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1493 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1494 power7_offline_type = PNV_THREAD_WINKLE; 1495 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1496 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1497 power7_offline_type = PNV_THREAD_SLEEP; 1498 } 1499 1500 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1501 if (pnv_save_sprs_for_deep_states()) 1502 pnv_disable_deep_states(); 1503 } 1504 1505 out: 1506 return 0; 1507 } 1508 machine_subsys_initcall(powernv, pnv_init_idle_states); 1509