1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * PowerNV cpuidle code 4 * 5 * Copyright 2015 IBM Corp. 6 */ 7 8 #include <linux/types.h> 9 #include <linux/mm.h> 10 #include <linux/slab.h> 11 #include <linux/of.h> 12 #include <linux/device.h> 13 #include <linux/cpu.h> 14 15 #include <asm/asm-prototypes.h> 16 #include <asm/firmware.h> 17 #include <asm/machdep.h> 18 #include <asm/opal.h> 19 #include <asm/cputhreads.h> 20 #include <asm/cpuidle.h> 21 #include <asm/code-patching.h> 22 #include <asm/smp.h> 23 #include <asm/runlatch.h> 24 #include <asm/dbell.h> 25 26 #include "powernv.h" 27 #include "subcore.h" 28 29 /* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 30 #define MAX_STOP_STATE 0xF 31 32 #define P9_STOP_SPR_MSR 2000 33 #define P9_STOP_SPR_PSSCR 855 34 35 static u32 supported_cpuidle_states; 36 struct pnv_idle_states_t *pnv_idle_states; 37 int nr_pnv_idle_states; 38 39 /* 40 * The default stop state that will be used by ppc_md.power_save 41 * function on platforms that support stop instruction. 42 */ 43 static u64 pnv_default_stop_val; 44 static u64 pnv_default_stop_mask; 45 static bool default_stop_found; 46 47 /* 48 * First stop state levels when SPR and TB loss can occur. 49 */ 50 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 51 static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; 52 53 /* 54 * psscr value and mask of the deepest stop idle state. 55 * Used when a cpu is offlined. 56 */ 57 static u64 pnv_deepest_stop_psscr_val; 58 static u64 pnv_deepest_stop_psscr_mask; 59 static u64 pnv_deepest_stop_flag; 60 static bool deepest_stop_found; 61 62 static unsigned long power7_offline_type; 63 64 static int pnv_save_sprs_for_deep_states(void) 65 { 66 int cpu; 67 int rc; 68 69 /* 70 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 71 * all cpus at boot. Get these reg values of current cpu and use the 72 * same across all cpus. 73 */ 74 uint64_t lpcr_val = mfspr(SPRN_LPCR); 75 uint64_t hid0_val = mfspr(SPRN_HID0); 76 uint64_t hmeer_val = mfspr(SPRN_HMEER); 77 uint64_t msr_val = MSR_IDLE; 78 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 79 80 for_each_present_cpu(cpu) { 81 uint64_t pir = get_hard_smp_processor_id(cpu); 82 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 83 84 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 85 if (rc != 0) 86 return rc; 87 88 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 89 if (rc != 0) 90 return rc; 91 92 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 93 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 94 if (rc) 95 return rc; 96 97 rc = opal_slw_set_reg(pir, 98 P9_STOP_SPR_PSSCR, psscr_val); 99 100 if (rc) 101 return rc; 102 } 103 104 /* HIDs are per core registers */ 105 if (cpu_thread_in_core(cpu) == 0) { 106 107 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 108 if (rc != 0) 109 return rc; 110 111 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 112 if (rc != 0) 113 return rc; 114 115 /* Only p8 needs to set extra HID regiters */ 116 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 117 uint64_t hid1_val = mfspr(SPRN_HID1); 118 uint64_t hid4_val = mfspr(SPRN_HID4); 119 uint64_t hid5_val = mfspr(SPRN_HID5); 120 121 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 122 if (rc != 0) 123 return rc; 124 125 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 126 if (rc != 0) 127 return rc; 128 129 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 130 if (rc != 0) 131 return rc; 132 } 133 } 134 } 135 136 return 0; 137 } 138 139 u32 pnv_get_supported_cpuidle_states(void) 140 { 141 return supported_cpuidle_states; 142 } 143 EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 144 145 static void pnv_fastsleep_workaround_apply(void *info) 146 147 { 148 int rc; 149 int *err = info; 150 151 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 152 OPAL_CONFIG_IDLE_APPLY); 153 if (rc) 154 *err = 1; 155 } 156 157 static bool power7_fastsleep_workaround_entry = true; 158 static bool power7_fastsleep_workaround_exit = true; 159 160 /* 161 * Used to store fastsleep workaround state 162 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 163 * 1 - Workaround applied once, never undone. 164 */ 165 static u8 fastsleep_workaround_applyonce; 166 167 static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 168 struct device_attribute *attr, char *buf) 169 { 170 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 171 } 172 173 static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 174 struct device_attribute *attr, const char *buf, 175 size_t count) 176 { 177 cpumask_t primary_thread_mask; 178 int err; 179 u8 val; 180 181 if (kstrtou8(buf, 0, &val) || val != 1) 182 return -EINVAL; 183 184 if (fastsleep_workaround_applyonce == 1) 185 return count; 186 187 /* 188 * fastsleep_workaround_applyonce = 1 implies 189 * fastsleep workaround needs to be left in 'applied' state on all 190 * the cores. Do this by- 191 * 1. Disable the 'undo' workaround in fastsleep exit path 192 * 2. Sendi IPIs to all the cores which have at least one online thread 193 * 3. Disable the 'apply' workaround in fastsleep entry path 194 * 195 * There is no need to send ipi to cores which have all threads 196 * offlined, as last thread of the core entering fastsleep or deeper 197 * state would have applied workaround. 198 */ 199 power7_fastsleep_workaround_exit = false; 200 201 get_online_cpus(); 202 primary_thread_mask = cpu_online_cores_map(); 203 on_each_cpu_mask(&primary_thread_mask, 204 pnv_fastsleep_workaround_apply, 205 &err, 1); 206 put_online_cpus(); 207 if (err) { 208 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 209 goto fail; 210 } 211 212 power7_fastsleep_workaround_entry = false; 213 214 fastsleep_workaround_applyonce = 1; 215 216 return count; 217 fail: 218 return -EIO; 219 } 220 221 static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 222 show_fastsleep_workaround_applyonce, 223 store_fastsleep_workaround_applyonce); 224 225 static inline void atomic_start_thread_idle(void) 226 { 227 int cpu = raw_smp_processor_id(); 228 int first = cpu_first_thread_sibling(cpu); 229 int thread_nr = cpu_thread_in_core(cpu); 230 unsigned long *state = &paca_ptrs[first]->idle_state; 231 232 clear_bit(thread_nr, state); 233 } 234 235 static inline void atomic_stop_thread_idle(void) 236 { 237 int cpu = raw_smp_processor_id(); 238 int first = cpu_first_thread_sibling(cpu); 239 int thread_nr = cpu_thread_in_core(cpu); 240 unsigned long *state = &paca_ptrs[first]->idle_state; 241 242 set_bit(thread_nr, state); 243 } 244 245 static inline void atomic_lock_thread_idle(void) 246 { 247 int cpu = raw_smp_processor_id(); 248 int first = cpu_first_thread_sibling(cpu); 249 unsigned long *state = &paca_ptrs[first]->idle_state; 250 251 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) 252 barrier(); 253 } 254 255 static inline void atomic_unlock_and_stop_thread_idle(void) 256 { 257 int cpu = raw_smp_processor_id(); 258 int first = cpu_first_thread_sibling(cpu); 259 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 260 unsigned long *state = &paca_ptrs[first]->idle_state; 261 u64 s = READ_ONCE(*state); 262 u64 new, tmp; 263 264 BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); 265 BUG_ON(s & thread); 266 267 again: 268 new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; 269 tmp = cmpxchg(state, s, new); 270 if (unlikely(tmp != s)) { 271 s = tmp; 272 goto again; 273 } 274 } 275 276 static inline void atomic_unlock_thread_idle(void) 277 { 278 int cpu = raw_smp_processor_id(); 279 int first = cpu_first_thread_sibling(cpu); 280 unsigned long *state = &paca_ptrs[first]->idle_state; 281 282 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); 283 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); 284 } 285 286 /* P7 and P8 */ 287 struct p7_sprs { 288 /* per core */ 289 u64 tscr; 290 u64 worc; 291 292 /* per subcore */ 293 u64 sdr1; 294 u64 rpr; 295 296 /* per thread */ 297 u64 lpcr; 298 u64 hfscr; 299 u64 fscr; 300 u64 purr; 301 u64 spurr; 302 u64 dscr; 303 u64 wort; 304 305 /* per thread SPRs that get lost in shallow states */ 306 u64 amr; 307 u64 iamr; 308 u64 amor; 309 u64 uamor; 310 }; 311 312 static unsigned long power7_idle_insn(unsigned long type) 313 { 314 int cpu = raw_smp_processor_id(); 315 int first = cpu_first_thread_sibling(cpu); 316 unsigned long *state = &paca_ptrs[first]->idle_state; 317 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 318 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 319 unsigned long srr1; 320 bool full_winkle; 321 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 322 bool sprs_saved = false; 323 int rc; 324 325 if (unlikely(type != PNV_THREAD_NAP)) { 326 atomic_lock_thread_idle(); 327 328 BUG_ON(!(*state & thread)); 329 *state &= ~thread; 330 331 if (power7_fastsleep_workaround_entry) { 332 if ((*state & core_thread_mask) == 0) { 333 rc = opal_config_cpu_idle_state( 334 OPAL_CONFIG_IDLE_FASTSLEEP, 335 OPAL_CONFIG_IDLE_APPLY); 336 BUG_ON(rc); 337 } 338 } 339 340 if (type == PNV_THREAD_WINKLE) { 341 sprs.tscr = mfspr(SPRN_TSCR); 342 sprs.worc = mfspr(SPRN_WORC); 343 344 sprs.sdr1 = mfspr(SPRN_SDR1); 345 sprs.rpr = mfspr(SPRN_RPR); 346 347 sprs.lpcr = mfspr(SPRN_LPCR); 348 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 349 sprs.hfscr = mfspr(SPRN_HFSCR); 350 sprs.fscr = mfspr(SPRN_FSCR); 351 } 352 sprs.purr = mfspr(SPRN_PURR); 353 sprs.spurr = mfspr(SPRN_SPURR); 354 sprs.dscr = mfspr(SPRN_DSCR); 355 sprs.wort = mfspr(SPRN_WORT); 356 357 sprs_saved = true; 358 359 /* 360 * Increment winkle counter and set all winkle bits if 361 * all threads are winkling. This allows wakeup side to 362 * distinguish between fast sleep and winkle state 363 * loss. Fast sleep still has to resync the timebase so 364 * this may not be a really big win. 365 */ 366 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 367 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 368 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 369 == threads_per_core) 370 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 371 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 372 } 373 374 atomic_unlock_thread_idle(); 375 } 376 377 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 378 sprs.amr = mfspr(SPRN_AMR); 379 sprs.iamr = mfspr(SPRN_IAMR); 380 sprs.amor = mfspr(SPRN_AMOR); 381 sprs.uamor = mfspr(SPRN_UAMOR); 382 } 383 384 local_paca->thread_idle_state = type; 385 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 386 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 387 388 WARN_ON_ONCE(!srr1); 389 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 390 391 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 392 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 393 /* 394 * We don't need an isync after the mtsprs here because 395 * the upcoming mtmsrd is execution synchronizing. 396 */ 397 mtspr(SPRN_AMR, sprs.amr); 398 mtspr(SPRN_IAMR, sprs.iamr); 399 mtspr(SPRN_AMOR, sprs.amor); 400 mtspr(SPRN_UAMOR, sprs.uamor); 401 } 402 } 403 404 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 405 hmi_exception_realmode(NULL); 406 407 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 408 if (unlikely(type != PNV_THREAD_NAP)) { 409 atomic_lock_thread_idle(); 410 if (type == PNV_THREAD_WINKLE) { 411 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 412 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 413 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 414 } 415 atomic_unlock_and_stop_thread_idle(); 416 } 417 return srr1; 418 } 419 420 /* HV state loss */ 421 BUG_ON(type == PNV_THREAD_NAP); 422 423 atomic_lock_thread_idle(); 424 425 full_winkle = false; 426 if (type == PNV_THREAD_WINKLE) { 427 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 428 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 429 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 430 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 431 full_winkle = true; 432 BUG_ON(!sprs_saved); 433 } 434 } 435 436 WARN_ON(*state & thread); 437 438 if ((*state & core_thread_mask) != 0) 439 goto core_woken; 440 441 /* Per-core SPRs */ 442 if (full_winkle) { 443 mtspr(SPRN_TSCR, sprs.tscr); 444 mtspr(SPRN_WORC, sprs.worc); 445 } 446 447 if (power7_fastsleep_workaround_exit) { 448 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 449 OPAL_CONFIG_IDLE_UNDO); 450 BUG_ON(rc); 451 } 452 453 /* TB */ 454 if (opal_resync_timebase() != OPAL_SUCCESS) 455 BUG(); 456 457 core_woken: 458 if (!full_winkle) 459 goto subcore_woken; 460 461 if ((*state & local_paca->subcore_sibling_mask) != 0) 462 goto subcore_woken; 463 464 /* Per-subcore SPRs */ 465 mtspr(SPRN_SDR1, sprs.sdr1); 466 mtspr(SPRN_RPR, sprs.rpr); 467 468 subcore_woken: 469 /* 470 * isync after restoring shared SPRs and before unlocking. Unlock 471 * only contains hwsync which does not necessarily do the right 472 * thing for SPRs. 473 */ 474 isync(); 475 atomic_unlock_and_stop_thread_idle(); 476 477 /* Fast sleep does not lose SPRs */ 478 if (!full_winkle) 479 return srr1; 480 481 /* Per-thread SPRs */ 482 mtspr(SPRN_LPCR, sprs.lpcr); 483 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 484 mtspr(SPRN_HFSCR, sprs.hfscr); 485 mtspr(SPRN_FSCR, sprs.fscr); 486 } 487 mtspr(SPRN_PURR, sprs.purr); 488 mtspr(SPRN_SPURR, sprs.spurr); 489 mtspr(SPRN_DSCR, sprs.dscr); 490 mtspr(SPRN_WORT, sprs.wort); 491 492 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 493 494 /* 495 * The SLB has to be restored here, but it sometimes still 496 * contains entries, so the __ variant must be used to prevent 497 * multi hits. 498 */ 499 __slb_restore_bolted_realmode(); 500 501 return srr1; 502 } 503 504 extern unsigned long idle_kvm_start_guest(unsigned long srr1); 505 506 #ifdef CONFIG_HOTPLUG_CPU 507 static unsigned long power7_offline(void) 508 { 509 unsigned long srr1; 510 511 mtmsr(MSR_IDLE); 512 513 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 514 /* Tell KVM we're entering idle. */ 515 /******************************************************/ 516 /* N O T E W E L L ! ! ! N O T E W E L L */ 517 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 518 /* MUST occur in real mode, i.e. with the MMU off, */ 519 /* and the MMU must stay off until we clear this flag */ 520 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 521 /* pnv_powersave_wakeup in this file. */ 522 /* The reason is that another thread can switch the */ 523 /* MMU to a guest context whenever this flag is set */ 524 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 525 /* that would potentially cause this thread to start */ 526 /* executing instructions from guest memory in */ 527 /* hypervisor mode, leading to a host crash or data */ 528 /* corruption, or worse. */ 529 /******************************************************/ 530 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 531 #endif 532 533 __ppc64_runlatch_off(); 534 srr1 = power7_idle_insn(power7_offline_type); 535 __ppc64_runlatch_on(); 536 537 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 538 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 539 /* Order setting hwthread_state vs. testing hwthread_req */ 540 smp_mb(); 541 if (local_paca->kvm_hstate.hwthread_req) 542 srr1 = idle_kvm_start_guest(srr1); 543 #endif 544 545 mtmsr(MSR_KERNEL); 546 547 return srr1; 548 } 549 #endif 550 551 void power7_idle_type(unsigned long type) 552 { 553 unsigned long srr1; 554 555 if (!prep_irq_for_idle_irqsoff()) 556 return; 557 558 mtmsr(MSR_IDLE); 559 __ppc64_runlatch_off(); 560 srr1 = power7_idle_insn(type); 561 __ppc64_runlatch_on(); 562 mtmsr(MSR_KERNEL); 563 564 fini_irq_for_idle_irqsoff(); 565 irq_set_pending_from_srr1(srr1); 566 } 567 568 static void power7_idle(void) 569 { 570 if (!powersave_nap) 571 return; 572 573 power7_idle_type(PNV_THREAD_NAP); 574 } 575 576 struct p9_sprs { 577 /* per core */ 578 u64 ptcr; 579 u64 rpr; 580 u64 tscr; 581 u64 ldbar; 582 583 /* per thread */ 584 u64 lpcr; 585 u64 hfscr; 586 u64 fscr; 587 u64 pid; 588 u64 purr; 589 u64 spurr; 590 u64 dscr; 591 u64 wort; 592 u64 ciabr; 593 594 u64 mmcra; 595 u32 mmcr0; 596 u32 mmcr1; 597 u64 mmcr2; 598 599 /* per thread SPRs that get lost in shallow states */ 600 u64 amr; 601 u64 iamr; 602 u64 amor; 603 u64 uamor; 604 }; 605 606 static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) 607 { 608 int cpu = raw_smp_processor_id(); 609 int first = cpu_first_thread_sibling(cpu); 610 unsigned long *state = &paca_ptrs[first]->idle_state; 611 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 612 unsigned long srr1; 613 unsigned long pls; 614 unsigned long mmcr0 = 0; 615 unsigned long mmcra = 0; 616 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 617 bool sprs_saved = false; 618 619 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 620 /* EC=ESL=0 case */ 621 622 BUG_ON(!mmu_on); 623 624 /* 625 * Wake synchronously. SRESET via xscom may still cause 626 * a 0x100 powersave wakeup with SRR1 reason! 627 */ 628 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 629 if (likely(!srr1)) 630 return 0; 631 632 /* 633 * Registers not saved, can't recover! 634 * This would be a hardware bug 635 */ 636 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 637 638 goto out; 639 } 640 641 /* EC=ESL=1 case */ 642 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 643 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 644 local_paca->requested_psscr = psscr; 645 /* order setting requested_psscr vs testing dont_stop */ 646 smp_mb(); 647 if (atomic_read(&local_paca->dont_stop)) { 648 local_paca->requested_psscr = 0; 649 return 0; 650 } 651 } 652 #endif 653 654 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 655 /* 656 * POWER9 DD2 can incorrectly set PMAO when waking up 657 * after a state-loss idle. Saving and restoring MMCR0 658 * over idle is a workaround. 659 */ 660 mmcr0 = mfspr(SPRN_MMCR0); 661 } 662 663 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 664 sprs.lpcr = mfspr(SPRN_LPCR); 665 sprs.hfscr = mfspr(SPRN_HFSCR); 666 sprs.fscr = mfspr(SPRN_FSCR); 667 sprs.pid = mfspr(SPRN_PID); 668 sprs.purr = mfspr(SPRN_PURR); 669 sprs.spurr = mfspr(SPRN_SPURR); 670 sprs.dscr = mfspr(SPRN_DSCR); 671 sprs.wort = mfspr(SPRN_WORT); 672 sprs.ciabr = mfspr(SPRN_CIABR); 673 674 sprs.mmcra = mfspr(SPRN_MMCRA); 675 sprs.mmcr0 = mfspr(SPRN_MMCR0); 676 sprs.mmcr1 = mfspr(SPRN_MMCR1); 677 sprs.mmcr2 = mfspr(SPRN_MMCR2); 678 679 sprs.ptcr = mfspr(SPRN_PTCR); 680 sprs.rpr = mfspr(SPRN_RPR); 681 sprs.tscr = mfspr(SPRN_TSCR); 682 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 683 sprs.ldbar = mfspr(SPRN_LDBAR); 684 685 sprs_saved = true; 686 687 atomic_start_thread_idle(); 688 } 689 690 sprs.amr = mfspr(SPRN_AMR); 691 sprs.iamr = mfspr(SPRN_IAMR); 692 sprs.amor = mfspr(SPRN_AMOR); 693 sprs.uamor = mfspr(SPRN_UAMOR); 694 695 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 696 697 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 698 local_paca->requested_psscr = 0; 699 #endif 700 701 psscr = mfspr(SPRN_PSSCR); 702 703 WARN_ON_ONCE(!srr1); 704 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 705 706 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 707 /* 708 * We don't need an isync after the mtsprs here because the 709 * upcoming mtmsrd is execution synchronizing. 710 */ 711 mtspr(SPRN_AMR, sprs.amr); 712 mtspr(SPRN_IAMR, sprs.iamr); 713 mtspr(SPRN_AMOR, sprs.amor); 714 mtspr(SPRN_UAMOR, sprs.uamor); 715 716 /* 717 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 718 * might have been corrupted and needs flushing. We also need 719 * to reload MMCR0 (see mmcr0 comment above). 720 */ 721 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 722 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 723 mtspr(SPRN_MMCR0, mmcr0); 724 } 725 726 /* 727 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 728 * to ensure the PMU starts running. 729 */ 730 mmcra = mfspr(SPRN_MMCRA); 731 mmcra |= PPC_BIT(60); 732 mtspr(SPRN_MMCRA, mmcra); 733 mmcra &= ~PPC_BIT(60); 734 mtspr(SPRN_MMCRA, mmcra); 735 } 736 737 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 738 hmi_exception_realmode(NULL); 739 740 /* 741 * On POWER9, SRR1 bits do not match exactly as expected. 742 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 743 * just always test PSSCR for SPR/TB state loss. 744 */ 745 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 746 if (likely(pls < deep_spr_loss_state)) { 747 if (sprs_saved) 748 atomic_stop_thread_idle(); 749 goto out; 750 } 751 752 /* HV state loss */ 753 BUG_ON(!sprs_saved); 754 755 atomic_lock_thread_idle(); 756 757 if ((*state & core_thread_mask) != 0) 758 goto core_woken; 759 760 /* Per-core SPRs */ 761 mtspr(SPRN_PTCR, sprs.ptcr); 762 mtspr(SPRN_RPR, sprs.rpr); 763 mtspr(SPRN_TSCR, sprs.tscr); 764 765 if (pls >= pnv_first_tb_loss_level) { 766 /* TB loss */ 767 if (opal_resync_timebase() != OPAL_SUCCESS) 768 BUG(); 769 } 770 771 /* 772 * isync after restoring shared SPRs and before unlocking. Unlock 773 * only contains hwsync which does not necessarily do the right 774 * thing for SPRs. 775 */ 776 isync(); 777 778 core_woken: 779 atomic_unlock_and_stop_thread_idle(); 780 781 /* Per-thread SPRs */ 782 mtspr(SPRN_LPCR, sprs.lpcr); 783 mtspr(SPRN_HFSCR, sprs.hfscr); 784 mtspr(SPRN_FSCR, sprs.fscr); 785 mtspr(SPRN_PID, sprs.pid); 786 mtspr(SPRN_PURR, sprs.purr); 787 mtspr(SPRN_SPURR, sprs.spurr); 788 mtspr(SPRN_DSCR, sprs.dscr); 789 mtspr(SPRN_WORT, sprs.wort); 790 mtspr(SPRN_CIABR, sprs.ciabr); 791 792 mtspr(SPRN_MMCRA, sprs.mmcra); 793 mtspr(SPRN_MMCR0, sprs.mmcr0); 794 mtspr(SPRN_MMCR1, sprs.mmcr1); 795 mtspr(SPRN_MMCR2, sprs.mmcr2); 796 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 797 mtspr(SPRN_LDBAR, sprs.ldbar); 798 799 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 800 801 if (!radix_enabled()) 802 __slb_restore_bolted_realmode(); 803 804 out: 805 if (mmu_on) 806 mtmsr(MSR_KERNEL); 807 808 return srr1; 809 } 810 811 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 812 /* 813 * This is used in working around bugs in thread reconfiguration 814 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 815 * memory and the way that XER[SO] is checkpointed. 816 * This function forces the core into SMT4 in order by asking 817 * all other threads not to stop, and sending a message to any 818 * that are in a stop state. 819 * Must be called with preemption disabled. 820 */ 821 void pnv_power9_force_smt4_catch(void) 822 { 823 int cpu, cpu0, thr; 824 int awake_threads = 1; /* this thread is awake */ 825 int poke_threads = 0; 826 int need_awake = threads_per_core; 827 828 cpu = smp_processor_id(); 829 cpu0 = cpu & ~(threads_per_core - 1); 830 for (thr = 0; thr < threads_per_core; ++thr) { 831 if (cpu != cpu0 + thr) 832 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 833 } 834 /* order setting dont_stop vs testing requested_psscr */ 835 smp_mb(); 836 for (thr = 0; thr < threads_per_core; ++thr) { 837 if (!paca_ptrs[cpu0+thr]->requested_psscr) 838 ++awake_threads; 839 else 840 poke_threads |= (1 << thr); 841 } 842 843 /* If at least 3 threads are awake, the core is in SMT4 already */ 844 if (awake_threads < need_awake) { 845 /* We have to wake some threads; we'll use msgsnd */ 846 for (thr = 0; thr < threads_per_core; ++thr) { 847 if (poke_threads & (1 << thr)) { 848 ppc_msgsnd_sync(); 849 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 850 paca_ptrs[cpu0+thr]->hw_cpu_id); 851 } 852 } 853 /* now spin until at least 3 threads are awake */ 854 do { 855 for (thr = 0; thr < threads_per_core; ++thr) { 856 if ((poke_threads & (1 << thr)) && 857 !paca_ptrs[cpu0+thr]->requested_psscr) { 858 ++awake_threads; 859 poke_threads &= ~(1 << thr); 860 } 861 } 862 } while (awake_threads < need_awake); 863 } 864 } 865 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 866 867 void pnv_power9_force_smt4_release(void) 868 { 869 int cpu, cpu0, thr; 870 871 cpu = smp_processor_id(); 872 cpu0 = cpu & ~(threads_per_core - 1); 873 874 /* clear all the dont_stop flags */ 875 for (thr = 0; thr < threads_per_core; ++thr) { 876 if (cpu != cpu0 + thr) 877 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 878 } 879 } 880 EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 881 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 882 883 struct p10_sprs { 884 /* 885 * SPRs that get lost in shallow states: 886 * 887 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 888 * isa300 idle routines restore CR, LR. 889 * CTR is volatile 890 * idle thread doesn't use FP or VEC 891 * kernel doesn't use TAR 892 * HSPRG1 is only live in HV interrupt entry 893 * SPRG2 is only live in KVM guests, KVM handles it. 894 */ 895 }; 896 897 static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on) 898 { 899 int cpu = raw_smp_processor_id(); 900 int first = cpu_first_thread_sibling(cpu); 901 unsigned long *state = &paca_ptrs[first]->idle_state; 902 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 903 unsigned long srr1; 904 unsigned long pls; 905 // struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 906 bool sprs_saved = false; 907 908 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 909 /* EC=ESL=0 case */ 910 911 BUG_ON(!mmu_on); 912 913 /* 914 * Wake synchronously. SRESET via xscom may still cause 915 * a 0x100 powersave wakeup with SRR1 reason! 916 */ 917 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 918 if (likely(!srr1)) 919 return 0; 920 921 /* 922 * Registers not saved, can't recover! 923 * This would be a hardware bug 924 */ 925 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 926 927 goto out; 928 } 929 930 /* EC=ESL=1 case */ 931 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 932 /* XXX: save SPRs for deep state loss here. */ 933 934 sprs_saved = true; 935 936 atomic_start_thread_idle(); 937 } 938 939 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 940 941 psscr = mfspr(SPRN_PSSCR); 942 943 WARN_ON_ONCE(!srr1); 944 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 945 946 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 947 hmi_exception_realmode(NULL); 948 949 /* 950 * On POWER10, SRR1 bits do not match exactly as expected. 951 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 952 * just always test PSSCR for SPR/TB state loss. 953 */ 954 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 955 if (likely(pls < deep_spr_loss_state)) { 956 if (sprs_saved) 957 atomic_stop_thread_idle(); 958 goto out; 959 } 960 961 /* HV state loss */ 962 BUG_ON(!sprs_saved); 963 964 atomic_lock_thread_idle(); 965 966 if ((*state & core_thread_mask) != 0) 967 goto core_woken; 968 969 /* XXX: restore per-core SPRs here */ 970 971 if (pls >= pnv_first_tb_loss_level) { 972 /* TB loss */ 973 if (opal_resync_timebase() != OPAL_SUCCESS) 974 BUG(); 975 } 976 977 /* 978 * isync after restoring shared SPRs and before unlocking. Unlock 979 * only contains hwsync which does not necessarily do the right 980 * thing for SPRs. 981 */ 982 isync(); 983 984 core_woken: 985 atomic_unlock_and_stop_thread_idle(); 986 987 /* XXX: restore per-thread SPRs here */ 988 989 if (!radix_enabled()) 990 __slb_restore_bolted_realmode(); 991 992 out: 993 if (mmu_on) 994 mtmsr(MSR_KERNEL); 995 996 return srr1; 997 } 998 999 #ifdef CONFIG_HOTPLUG_CPU 1000 static unsigned long arch300_offline_stop(unsigned long psscr) 1001 { 1002 unsigned long srr1; 1003 1004 #ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1005 __ppc64_runlatch_off(); 1006 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1007 srr1 = power10_idle_stop(psscr, true); 1008 else 1009 srr1 = power9_idle_stop(psscr, true); 1010 __ppc64_runlatch_on(); 1011 #else 1012 /* 1013 * Tell KVM we're entering idle. 1014 * This does not have to be done in real mode because the P9 MMU 1015 * is independent per-thread. Some steppings share radix/hash mode 1016 * between threads, but in that case KVM has a barrier sync in real 1017 * mode before and after switching between radix and hash. 1018 * 1019 * kvm_start_guest must still be called in real mode though, hence 1020 * the false argument. 1021 */ 1022 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 1023 1024 __ppc64_runlatch_off(); 1025 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1026 srr1 = power10_idle_stop(psscr, false); 1027 else 1028 srr1 = power9_idle_stop(psscr, false); 1029 __ppc64_runlatch_on(); 1030 1031 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 1032 /* Order setting hwthread_state vs. testing hwthread_req */ 1033 smp_mb(); 1034 if (local_paca->kvm_hstate.hwthread_req) 1035 srr1 = idle_kvm_start_guest(srr1); 1036 mtmsr(MSR_KERNEL); 1037 #endif 1038 1039 return srr1; 1040 } 1041 #endif 1042 1043 void arch300_idle_type(unsigned long stop_psscr_val, 1044 unsigned long stop_psscr_mask) 1045 { 1046 unsigned long psscr; 1047 unsigned long srr1; 1048 1049 if (!prep_irq_for_idle_irqsoff()) 1050 return; 1051 1052 psscr = mfspr(SPRN_PSSCR); 1053 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1054 1055 __ppc64_runlatch_off(); 1056 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1057 srr1 = power10_idle_stop(psscr, true); 1058 else 1059 srr1 = power9_idle_stop(psscr, true); 1060 __ppc64_runlatch_on(); 1061 1062 fini_irq_for_idle_irqsoff(); 1063 1064 irq_set_pending_from_srr1(srr1); 1065 } 1066 1067 /* 1068 * Used for ppc_md.power_save which needs a function with no parameters 1069 */ 1070 static void arch300_idle(void) 1071 { 1072 arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1073 } 1074 1075 #ifdef CONFIG_HOTPLUG_CPU 1076 1077 void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 1078 { 1079 u64 pir = get_hard_smp_processor_id(cpu); 1080 1081 mtspr(SPRN_LPCR, lpcr_val); 1082 1083 /* 1084 * Program the LPCR via stop-api only if the deepest stop state 1085 * can lose hypervisor context. 1086 */ 1087 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 1088 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 1089 } 1090 1091 /* 1092 * pnv_cpu_offline: A function that puts the CPU into the deepest 1093 * available platform idle state on a CPU-Offline. 1094 * interrupts hard disabled and no lazy irq pending. 1095 */ 1096 unsigned long pnv_cpu_offline(unsigned int cpu) 1097 { 1098 unsigned long srr1; 1099 1100 __ppc64_runlatch_off(); 1101 1102 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 1103 unsigned long psscr; 1104 1105 psscr = mfspr(SPRN_PSSCR); 1106 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1107 pnv_deepest_stop_psscr_val; 1108 srr1 = arch300_offline_stop(psscr); 1109 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1110 srr1 = power7_offline(); 1111 } else { 1112 /* This is the fallback method. We emulate snooze */ 1113 while (!generic_check_cpu_restart(cpu)) { 1114 HMT_low(); 1115 HMT_very_low(); 1116 } 1117 srr1 = 0; 1118 HMT_medium(); 1119 } 1120 1121 __ppc64_runlatch_on(); 1122 1123 return srr1; 1124 } 1125 #endif 1126 1127 /* 1128 * Power ISA 3.0 idle initialization. 1129 * 1130 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1131 * Register (PSSCR) to control idle behavior. 1132 * 1133 * PSSCR layout: 1134 * ---------------------------------------------------------- 1135 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1136 * ---------------------------------------------------------- 1137 * 0 4 41 42 43 44 48 54 56 60 1138 * 1139 * PSSCR key fields: 1140 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1141 * lowest power-saving state the thread entered since stop instruction was 1142 * last executed. 1143 * 1144 * Bit 41 - Status Disable(SD) 1145 * 0 - Shows PLS entries 1146 * 1 - PLS entries are all 0 1147 * 1148 * Bit 42 - Enable State Loss 1149 * 0 - No state is lost irrespective of other fields 1150 * 1 - Allows state loss 1151 * 1152 * Bit 43 - Exit Criterion 1153 * 0 - Exit from power-save mode on any interrupt 1154 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1155 * 1156 * Bits 44:47 - Power-Saving Level Limit 1157 * This limits the power-saving level that can be entered into. 1158 * 1159 * Bits 60:63 - Requested Level 1160 * Used to specify which power-saving level must be entered on executing 1161 * stop instruction 1162 */ 1163 1164 int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1165 { 1166 int err = 0; 1167 1168 /* 1169 * psscr_mask == 0xf indicates an older firmware. 1170 * Set remaining fields of psscr to the default values. 1171 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1172 */ 1173 if (*psscr_mask == 0xf) { 1174 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1175 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1176 return err; 1177 } 1178 1179 /* 1180 * New firmware is expected to set the psscr_val bits correctly. 1181 * Validate that the following invariants are correctly maintained by 1182 * the new firmware. 1183 * - ESL bit value matches the EC bit value. 1184 * - ESL bit is set for all the deep stop states. 1185 */ 1186 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1187 err = ERR_EC_ESL_MISMATCH; 1188 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1189 GET_PSSCR_ESL(*psscr_val) == 0) { 1190 err = ERR_DEEP_STATE_ESL_MISMATCH; 1191 } 1192 1193 return err; 1194 } 1195 1196 /* 1197 * pnv_arch300_idle_init: Initializes the default idle state, first 1198 * deep idle state and deepest idle state on 1199 * ISA 3.0 CPUs. 1200 * 1201 * @np: /ibm,opal/power-mgt device node 1202 * @flags: cpu-idle-state-flags array 1203 * @dt_idle_states: Number of idle state entries 1204 * Returns 0 on success 1205 */ 1206 static void __init pnv_arch300_idle_init(void) 1207 { 1208 u64 max_residency_ns = 0; 1209 int i; 1210 1211 /* stop is not really architected, we only have p9,p10 drivers */ 1212 if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) 1213 return; 1214 1215 /* 1216 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1217 * the deepest stop state. 1218 * 1219 * pnv_default_stop_{val,mask} should be set to values corresponding to 1220 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1221 */ 1222 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1223 deep_spr_loss_state = MAX_STOP_STATE + 1; 1224 for (i = 0; i < nr_pnv_idle_states; i++) { 1225 int err; 1226 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1227 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1228 1229 /* No deep loss driver implemented for POWER10 yet */ 1230 if (pvr_version_is(PVR_POWER10) && 1231 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1232 continue; 1233 1234 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1235 (pnv_first_tb_loss_level > psscr_rl)) 1236 pnv_first_tb_loss_level = psscr_rl; 1237 1238 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1239 (deep_spr_loss_state > psscr_rl)) 1240 deep_spr_loss_state = psscr_rl; 1241 1242 /* 1243 * The idle code does not deal with TB loss occurring 1244 * in a shallower state than SPR loss, so force it to 1245 * behave like SPRs are lost if TB is lost. POWER9 would 1246 * never encouter this, but a POWER8 core would if it 1247 * implemented the stop instruction. So this is for forward 1248 * compatibility. 1249 */ 1250 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1251 (deep_spr_loss_state > psscr_rl)) 1252 deep_spr_loss_state = psscr_rl; 1253 1254 err = validate_psscr_val_mask(&state->psscr_val, 1255 &state->psscr_mask, 1256 state->flags); 1257 if (err) { 1258 report_invalid_psscr_val(state->psscr_val, err); 1259 continue; 1260 } 1261 1262 state->valid = true; 1263 1264 if (max_residency_ns < state->residency_ns) { 1265 max_residency_ns = state->residency_ns; 1266 pnv_deepest_stop_psscr_val = state->psscr_val; 1267 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1268 pnv_deepest_stop_flag = state->flags; 1269 deepest_stop_found = true; 1270 } 1271 1272 if (!default_stop_found && 1273 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1274 pnv_default_stop_val = state->psscr_val; 1275 pnv_default_stop_mask = state->psscr_mask; 1276 default_stop_found = true; 1277 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1278 } 1279 } 1280 1281 if (unlikely(!default_stop_found)) { 1282 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1283 } else { 1284 ppc_md.power_save = arch300_idle; 1285 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1286 pnv_default_stop_val, pnv_default_stop_mask); 1287 } 1288 1289 if (unlikely(!deepest_stop_found)) { 1290 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1291 } else { 1292 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1293 pnv_deepest_stop_psscr_val, 1294 pnv_deepest_stop_psscr_mask); 1295 } 1296 1297 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", 1298 deep_spr_loss_state); 1299 1300 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", 1301 pnv_first_tb_loss_level); 1302 } 1303 1304 static void __init pnv_disable_deep_states(void) 1305 { 1306 /* 1307 * The stop-api is unable to restore hypervisor 1308 * resources on wakeup from platform idle states which 1309 * lose full context. So disable such states. 1310 */ 1311 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1312 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1313 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1314 1315 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1316 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1317 /* 1318 * Use the default stop state for CPU-Hotplug 1319 * if available. 1320 */ 1321 if (default_stop_found) { 1322 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1323 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1324 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1325 pnv_deepest_stop_psscr_val); 1326 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1327 deepest_stop_found = false; 1328 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1329 } 1330 } 1331 } 1332 1333 /* 1334 * Probe device tree for supported idle states 1335 */ 1336 static void __init pnv_probe_idle_states(void) 1337 { 1338 int i; 1339 1340 if (nr_pnv_idle_states < 0) { 1341 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1342 return; 1343 } 1344 1345 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1346 pnv_arch300_idle_init(); 1347 1348 for (i = 0; i < nr_pnv_idle_states; i++) 1349 supported_cpuidle_states |= pnv_idle_states[i].flags; 1350 } 1351 1352 /* 1353 * This function parses device-tree and populates all the information 1354 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1355 * which is the number of cpuidle states discovered through device-tree. 1356 */ 1357 1358 static int pnv_parse_cpuidle_dt(void) 1359 { 1360 struct device_node *np; 1361 int nr_idle_states, i; 1362 int rc = 0; 1363 u32 *temp_u32; 1364 u64 *temp_u64; 1365 const char **temp_string; 1366 1367 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1368 if (!np) { 1369 pr_warn("opal: PowerMgmt Node not found\n"); 1370 return -ENODEV; 1371 } 1372 nr_idle_states = of_property_count_u32_elems(np, 1373 "ibm,cpu-idle-state-flags"); 1374 1375 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 1376 GFP_KERNEL); 1377 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1378 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1379 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1380 1381 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1382 pr_err("Could not allocate memory for dt parsing\n"); 1383 rc = -ENOMEM; 1384 goto out; 1385 } 1386 1387 /* Read flags */ 1388 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1389 temp_u32, nr_idle_states)) { 1390 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1391 rc = -EINVAL; 1392 goto out; 1393 } 1394 for (i = 0; i < nr_idle_states; i++) 1395 pnv_idle_states[i].flags = temp_u32[i]; 1396 1397 /* Read latencies */ 1398 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1399 temp_u32, nr_idle_states)) { 1400 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1401 rc = -EINVAL; 1402 goto out; 1403 } 1404 for (i = 0; i < nr_idle_states; i++) 1405 pnv_idle_states[i].latency_ns = temp_u32[i]; 1406 1407 /* Read residencies */ 1408 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1409 temp_u32, nr_idle_states)) { 1410 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 1411 rc = -EINVAL; 1412 goto out; 1413 } 1414 for (i = 0; i < nr_idle_states; i++) 1415 pnv_idle_states[i].residency_ns = temp_u32[i]; 1416 1417 /* For power9 and later */ 1418 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1419 /* Read pm_crtl_val */ 1420 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1421 temp_u64, nr_idle_states)) { 1422 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1423 rc = -EINVAL; 1424 goto out; 1425 } 1426 for (i = 0; i < nr_idle_states; i++) 1427 pnv_idle_states[i].psscr_val = temp_u64[i]; 1428 1429 /* Read pm_crtl_mask */ 1430 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1431 temp_u64, nr_idle_states)) { 1432 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1433 rc = -EINVAL; 1434 goto out; 1435 } 1436 for (i = 0; i < nr_idle_states; i++) 1437 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1438 } 1439 1440 /* 1441 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1442 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1443 * plan to use it in near future. Hence, not parsing these properties 1444 */ 1445 1446 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1447 temp_string, nr_idle_states) < 0) { 1448 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1449 rc = -EINVAL; 1450 goto out; 1451 } 1452 for (i = 0; i < nr_idle_states; i++) 1453 strlcpy(pnv_idle_states[i].name, temp_string[i], 1454 PNV_IDLE_NAME_LEN); 1455 nr_pnv_idle_states = nr_idle_states; 1456 rc = 0; 1457 out: 1458 kfree(temp_u32); 1459 kfree(temp_u64); 1460 kfree(temp_string); 1461 return rc; 1462 } 1463 1464 static int __init pnv_init_idle_states(void) 1465 { 1466 int cpu; 1467 int rc = 0; 1468 1469 /* Set up PACA fields */ 1470 for_each_present_cpu(cpu) { 1471 struct paca_struct *p = paca_ptrs[cpu]; 1472 1473 p->idle_state = 0; 1474 if (cpu == cpu_first_thread_sibling(cpu)) 1475 p->idle_state = (1 << threads_per_core) - 1; 1476 1477 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1478 /* P7/P8 nap */ 1479 p->thread_idle_state = PNV_THREAD_RUNNING; 1480 } else if (pvr_version_is(PVR_POWER9)) { 1481 /* P9 stop workarounds */ 1482 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1483 p->requested_psscr = 0; 1484 atomic_set(&p->dont_stop, 0); 1485 #endif 1486 } 1487 } 1488 1489 /* In case we error out nr_pnv_idle_states will be zero */ 1490 nr_pnv_idle_states = 0; 1491 supported_cpuidle_states = 0; 1492 1493 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1494 goto out; 1495 rc = pnv_parse_cpuidle_dt(); 1496 if (rc) 1497 return rc; 1498 pnv_probe_idle_states(); 1499 1500 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1501 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1502 power7_fastsleep_workaround_entry = false; 1503 power7_fastsleep_workaround_exit = false; 1504 } else { 1505 /* 1506 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1507 * workaround is needed to use fastsleep. Provide sysfs 1508 * control to choose how this workaround has to be 1509 * applied. 1510 */ 1511 device_create_file(cpu_subsys.dev_root, 1512 &dev_attr_fastsleep_workaround_applyonce); 1513 } 1514 1515 update_subcore_sibling_mask(); 1516 1517 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1518 ppc_md.power_save = power7_idle; 1519 power7_offline_type = PNV_THREAD_NAP; 1520 } 1521 1522 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1523 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1524 power7_offline_type = PNV_THREAD_WINKLE; 1525 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1526 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1527 power7_offline_type = PNV_THREAD_SLEEP; 1528 } 1529 1530 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1531 if (pnv_save_sprs_for_deep_states()) 1532 pnv_disable_deep_states(); 1533 } 1534 1535 out: 1536 return 0; 1537 } 1538 machine_subsys_initcall(powernv, pnv_init_idle_states); 1539