1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/x86_archext.h> 27 #include <sys/machsystm.h> 28 #include <sys/x_call.h> 29 #include <sys/stat.h> 30 #include <sys/acpi/acpi.h> 31 #include <sys/acpica.h> 32 #include <sys/cpu_acpi.h> 33 #include <sys/cpu_idle.h> 34 #include <sys/cpupm.h> 35 #include <sys/hpet.h> 36 #include <sys/archsystm.h> 37 #include <vm/hat_i86.h> 38 #include <sys/dtrace.h> 39 #include <sys/sdt.h> 40 #include <sys/callb.h> 41 42 extern void cpu_idle_adaptive(void); 43 extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data, 44 cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start); 45 46 static int cpu_idle_init(cpu_t *); 47 static void cpu_idle_fini(cpu_t *); 48 static boolean_t cpu_deep_idle_callb(void *arg, int code); 49 static boolean_t cpu_idle_cpr_callb(void *arg, int code); 50 static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate); 51 52 /* 53 * Interfaces for modules implementing Intel's deep c-state. 54 */ 55 cpupm_state_ops_t cpu_idle_ops = { 56 "Generic ACPI C-state Support", 57 cpu_idle_init, 58 cpu_idle_fini, 59 NULL 60 }; 61 62 static kmutex_t cpu_idle_callb_mutex; 63 static callb_id_t cpu_deep_idle_callb_id; 64 static callb_id_t cpu_idle_cpr_callb_id; 65 static uint_t cpu_idle_cfg_state; 66 67 static kmutex_t cpu_idle_mutex; 68 69 cpu_idle_kstat_t cpu_idle_kstat = { 70 { "address_space_id", KSTAT_DATA_STRING }, 71 { "latency", KSTAT_DATA_UINT32 }, 72 { "power", KSTAT_DATA_UINT32 }, 73 }; 74 75 /* 76 * kstat update function of the c-state info 77 */ 78 static int 79 cpu_idle_kstat_update(kstat_t *ksp, int flag) 80 { 81 cpu_acpi_cstate_t *cstate = ksp->ks_private; 82 83 if (flag == KSTAT_WRITE) { 84 return (EACCES); 85 } 86 87 if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { 88 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 89 "FFixedHW"); 90 } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) { 91 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 92 "SystemIO"); 93 } else { 94 kstat_named_setstr(&cpu_idle_kstat.addr_space_id, 95 "Unsupported"); 96 } 97 98 cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency; 99 cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power; 100 101 return (0); 102 } 103 104 /* 105 * c-state wakeup function. 106 * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals 107 * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State. 108 */ 109 void 110 cstate_wakeup(cpu_t *cp, int bound) 111 { 112 struct machcpu *mcpu = &(cp->cpu_m); 113 volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait; 114 cpupart_t *cpu_part; 115 uint_t cpu_found; 116 processorid_t cpu_sid; 117 118 cpu_part = cp->cpu_part; 119 cpu_sid = cp->cpu_seqid; 120 /* 121 * Clear the halted bit for that CPU since it will be woken up 122 * in a moment. 123 */ 124 if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 125 /* 126 * Clear the halted bit for that CPU since it will be 127 * poked in a moment. 128 */ 129 bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid); 130 131 /* 132 * We may find the current CPU present in the halted cpuset 133 * if we're in the context of an interrupt that occurred 134 * before we had a chance to clear our bit in cpu_idle(). 135 * Waking ourself is obviously unnecessary, since if 136 * we're here, we're not halted. 137 */ 138 if (cp != CPU) { 139 /* 140 * Use correct wakeup mechanism 141 */ 142 if ((mcpu_mwait != NULL) && 143 (*mcpu_mwait == MWAIT_HALTED)) 144 MWAIT_WAKEUP(cp); 145 else 146 poke_cpu(cp->cpu_id); 147 } 148 return; 149 } else { 150 /* 151 * This cpu isn't halted, but it's idle or undergoing a 152 * context switch. No need to awaken anyone else. 153 */ 154 if (cp->cpu_thread == cp->cpu_idle_thread || 155 cp->cpu_disp_flags & CPU_DISP_DONTSTEAL) 156 return; 157 } 158 159 /* 160 * No need to wake up other CPUs if the thread we just enqueued 161 * is bound. 162 */ 163 if (bound) 164 return; 165 166 167 /* 168 * See if there's any other halted CPUs. If there are, then 169 * select one, and awaken it. 170 * It's possible that after we find a CPU, somebody else 171 * will awaken it before we get the chance. 172 * In that case, look again. 173 */ 174 do { 175 cpu_found = bitset_find(&cpu_part->cp_haltset); 176 if (cpu_found == (uint_t)-1) 177 return; 178 179 } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset, 180 cpu_found) < 0); 181 182 /* 183 * Must use correct wakeup mechanism to avoid lost wakeup of 184 * alternate cpu. 185 */ 186 if (cpu_found != CPU->cpu_seqid) { 187 mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait; 188 if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED)) 189 MWAIT_WAKEUP(cpu_seq[cpu_found]); 190 else 191 poke_cpu(cpu_seq[cpu_found]->cpu_id); 192 } 193 } 194 195 /* 196 * enter deep c-state handler 197 */ 198 static void 199 acpi_cpu_cstate(cpu_acpi_cstate_t *cstate) 200 { 201 volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait; 202 cpu_t *cpup = CPU; 203 processorid_t cpu_sid = cpup->cpu_seqid; 204 cpupart_t *cp = cpup->cpu_part; 205 hrtime_t lapic_expire; 206 uint8_t type = cstate->cs_addrspace_id; 207 uint32_t cs_type = cstate->cs_type; 208 int hset_update = 1; 209 boolean_t using_hpet_timer; 210 211 /* 212 * Set our mcpu_mwait here, so we can tell if anyone tries to 213 * wake us between now and when we call mwait. No other cpu will 214 * attempt to set our mcpu_mwait until we add ourself to the haltset. 215 */ 216 if (mcpu_mwait) { 217 if (type == ACPI_ADR_SPACE_SYSTEM_IO) 218 *mcpu_mwait = MWAIT_WAKEUP_IPI; 219 else 220 *mcpu_mwait = MWAIT_HALTED; 221 } 222 223 /* 224 * If this CPU is online, and there are multiple CPUs 225 * in the system, then we should note our halting 226 * by adding ourselves to the partition's halted CPU 227 * bitmap. This allows other CPUs to find/awaken us when 228 * work becomes available. 229 */ 230 if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1) 231 hset_update = 0; 232 233 /* 234 * Add ourselves to the partition's halted CPUs bitmask 235 * and set our HALTED flag, if necessary. 236 * 237 * When a thread becomes runnable, it is placed on the queue 238 * and then the halted cpuset is checked to determine who 239 * (if anyone) should be awakened. We therefore need to first 240 * add ourselves to the halted cpuset, and and then check if there 241 * is any work available. 242 * 243 * Note that memory barriers after updating the HALTED flag 244 * are not necessary since an atomic operation (updating the bitmap) 245 * immediately follows. On x86 the atomic operation acts as a 246 * memory barrier for the update of cpu_disp_flags. 247 */ 248 if (hset_update) { 249 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 250 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 251 } 252 253 /* 254 * Check to make sure there's really nothing to do. 255 * Work destined for this CPU may become available after 256 * this check. We'll be notified through the clearing of our 257 * bit in the halted CPU bitmask, and a write to our mcpu_mwait. 258 * 259 * disp_anywork() checks disp_nrunnable, so we do not have to later. 260 */ 261 if (disp_anywork()) { 262 if (hset_update) { 263 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 264 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 265 } 266 return; 267 } 268 269 /* 270 * We're on our way to being halted. 271 * 272 * The local APIC timer can stop in ACPI C2 and deeper c-states. 273 * Program the HPET hardware to substitute for this CPU's lAPIC timer. 274 * hpet.use_hpet_timer() disables the LAPIC Timer. Make sure to 275 * start the LAPIC Timer again before leaving this function. 276 * 277 * hpet.use_hpet_timer disables interrupts, so we will awaken 278 * immediately after halting if someone tries to poke us between now 279 * and the time we actually halt. 280 */ 281 using_hpet_timer = hpet.use_hpet_timer(&lapic_expire); 282 283 /* 284 * We check for the presence of our bit after disabling interrupts. 285 * If it's cleared, we'll return. If the bit is cleared after 286 * we check then the cstate_wakeup() will pop us out of the halted 287 * state. 288 * 289 * This means that the ordering of the cstate_wakeup() and the clearing 290 * of the bit by cpu_wakeup is important. 291 * cpu_wakeup() must clear our mc_haltset bit, and then call 292 * cstate_wakeup(). 293 * acpi_cpu_cstate() must disable interrupts, then check for the bit. 294 */ 295 if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) { 296 hpet.use_lapic_timer(lapic_expire); 297 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 298 return; 299 } 300 301 /* 302 * The check for anything locally runnable is here for performance 303 * and isn't needed for correctness. disp_nrunnable ought to be 304 * in our cache still, so it's inexpensive to check, and if there 305 * is anything runnable we won't have to wait for the poke. 306 */ 307 if (cpup->cpu_disp->disp_nrunnable != 0) { 308 hpet.use_lapic_timer(lapic_expire); 309 if (hset_update) { 310 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 311 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 312 } 313 return; 314 } 315 316 if (using_hpet_timer == B_FALSE) { 317 318 hpet.use_lapic_timer(lapic_expire); 319 320 /* 321 * We are currently unable to program the HPET to act as this 322 * CPU's proxy lAPIC timer. This CPU cannot enter C2 or deeper 323 * because no timer is set to wake it up while its lAPIC timer 324 * stalls in deep C-States. 325 * Enter C1 instead. 326 * 327 * cstate_wake_cpu() will wake this CPU with an IPI which 328 * works with MWAIT. 329 */ 330 i86_monitor(mcpu_mwait, 0, 0); 331 if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) { 332 cpu_dtrace_idle_probe(CPU_ACPI_C1); 333 334 tlb_going_idle(); 335 i86_mwait(0, 0); 336 tlb_service(); 337 338 cpu_dtrace_idle_probe(CPU_ACPI_C0); 339 } 340 341 /* 342 * We're no longer halted 343 */ 344 if (hset_update) { 345 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 346 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 347 } 348 return; 349 } 350 351 cpu_dtrace_idle_probe((uint_t)cs_type); 352 353 if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) { 354 /* 355 * We're on our way to being halted. 356 * To avoid a lost wakeup, arm the monitor before checking 357 * if another cpu wrote to mcpu_mwait to wake us up. 358 */ 359 i86_monitor(mcpu_mwait, 0, 0); 360 if (*mcpu_mwait == MWAIT_HALTED) { 361 uint32_t eax = cstate->cs_address; 362 uint32_t ecx = 1; 363 364 tlb_going_idle(); 365 i86_mwait(eax, ecx); 366 tlb_service(); 367 } 368 } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) { 369 uint32_t value; 370 ACPI_TABLE_FADT *gbl_FADT; 371 372 if (*mcpu_mwait == MWAIT_WAKEUP_IPI) { 373 tlb_going_idle(); 374 (void) cpu_acpi_read_port(cstate->cs_address, 375 &value, 8); 376 acpica_get_global_FADT(&gbl_FADT); 377 (void) cpu_acpi_read_port( 378 gbl_FADT->XPmTimerBlock.Address, &value, 32); 379 tlb_service(); 380 } 381 } 382 383 /* 384 * The lAPIC timer may have stopped in deep c-state. 385 * Reprogram this CPU's lAPIC here before enabling interrupts. 386 */ 387 hpet.use_lapic_timer(lapic_expire); 388 389 cpu_dtrace_idle_probe(CPU_ACPI_C0); 390 391 /* 392 * We're no longer halted 393 */ 394 if (hset_update) { 395 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 396 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 397 } 398 } 399 400 /* 401 * indicate when bus masters are active 402 */ 403 static uint32_t 404 cpu_acpi_bm_sts(void) 405 { 406 uint32_t bm_sts = 0; 407 408 cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_sts); 409 410 if (bm_sts) 411 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 412 413 return (bm_sts); 414 } 415 416 /* 417 * Idle the present CPU, deep c-state is supported 418 */ 419 void 420 cpu_acpi_idle(void) 421 { 422 cpu_t *cp = CPU; 423 cpu_acpi_handle_t handle; 424 cma_c_state_t *cs_data; 425 cpu_acpi_cstate_t *cstates; 426 hrtime_t start, end; 427 int cpu_max_cstates; 428 uint32_t cs_indx; 429 uint16_t cs_type; 430 431 cpupm_mach_state_t *mach_state = 432 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 433 handle = mach_state->ms_acpi_handle; 434 ASSERT(CPU_ACPI_CSTATES(handle) != NULL); 435 436 cs_data = mach_state->ms_cstate.cma_state.cstate; 437 cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 438 ASSERT(cstates != NULL); 439 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 440 if (cpu_max_cstates > CPU_MAX_CSTATES) 441 cpu_max_cstates = CPU_MAX_CSTATES; 442 if (cpu_max_cstates == 1) { /* no ACPI c-state data */ 443 (*non_deep_idle_cpu)(); 444 return; 445 } 446 447 start = gethrtime_unscaled(); 448 449 cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start); 450 451 /* 452 * OSPM uses the BM_STS bit to determine the power state to enter 453 * when considering a transition to or from the C2/C3 power state. 454 * if C3 is determined, bus master activity demotes the power state 455 * to C2. 456 */ 457 if ((cstates[cs_indx].cs_type >= CPU_ACPI_C3) && cpu_acpi_bm_sts()) 458 --cs_indx; 459 cs_type = cstates[cs_indx].cs_type; 460 461 /* 462 * BM_RLD determines if the Cx power state was exited as a result of 463 * bus master requests. Set this bit when using a C3 power state, and 464 * clear it when using a C1 or C2 power state. 465 */ 466 if ((CPU_ACPI_BM_INFO(handle) & BM_RLD) && (cs_type < CPU_ACPI_C3)) { 467 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 468 CPU_ACPI_BM_INFO(handle) &= ~BM_RLD; 469 } 470 471 if ((!(CPU_ACPI_BM_INFO(handle) & BM_RLD)) && 472 (cs_type >= CPU_ACPI_C3)) { 473 cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 474 CPU_ACPI_BM_INFO(handle) |= BM_RLD; 475 } 476 477 switch (cs_type) { 478 default: 479 /* FALLTHROUGH */ 480 case CPU_ACPI_C1: 481 (*non_deep_idle_cpu)(); 482 break; 483 484 case CPU_ACPI_C2: 485 acpi_cpu_cstate(&cstates[cs_indx]); 486 break; 487 488 case CPU_ACPI_C3: 489 /* 490 * recommended in ACPI spec, providing hardware mechanisms 491 * to prevent master from writing to memory (UP-only) 492 */ 493 if ((ncpus_online == 1) && 494 (CPU_ACPI_BM_INFO(handle) & BM_CTL)) { 495 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 496 CPU_ACPI_BM_INFO(handle) |= BM_ARB_DIS; 497 /* 498 * Today all Intel's processor support C3 share cache. 499 */ 500 } else if (x86_vendor != X86_VENDOR_Intel) { 501 __acpi_wbinvd(); 502 } 503 acpi_cpu_cstate(&cstates[cs_indx]); 504 if (CPU_ACPI_BM_INFO(handle) & BM_ARB_DIS) { 505 cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 506 CPU_ACPI_BM_INFO(handle) &= ~BM_ARB_DIS; 507 } 508 break; 509 } 510 511 end = gethrtime_unscaled(); 512 513 /* 514 * Update statistics 515 */ 516 cpupm_wakeup_cstate_data(cs_data, end); 517 } 518 519 boolean_t 520 cpu_deep_cstates_supported(void) 521 { 522 extern int idle_cpu_no_deep_c; 523 524 if (idle_cpu_no_deep_c) 525 return (B_FALSE); 526 527 if (!cpuid_deep_cstates_supported()) 528 return (B_FALSE); 529 530 if ((hpet.supported != HPET_FULL_SUPPORT) || !hpet.install_proxy()) 531 return (B_FALSE); 532 533 return (B_TRUE); 534 } 535 536 /* 537 * Validate that this processor supports deep cstate and if so, 538 * get the c-state data from ACPI and cache it. 539 */ 540 static int 541 cpu_idle_init(cpu_t *cp) 542 { 543 cpupm_mach_state_t *mach_state = 544 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 545 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 546 cpu_acpi_cstate_t *cstate; 547 char name[KSTAT_STRLEN]; 548 int cpu_max_cstates, i; 549 ACPI_TABLE_FADT *gbl_FADT; 550 551 /* 552 * Cache the C-state specific ACPI data. 553 */ 554 if (cpu_acpi_cache_cstate_data(handle) != 0) { 555 cmn_err(CE_NOTE, 556 "!cpu_idle_init: Failed to cache ACPI C-state data\n"); 557 cpu_idle_fini(cp); 558 return (-1); 559 } 560 561 /* 562 * Check the bus master arbitration control ability. 563 */ 564 acpica_get_global_FADT(&gbl_FADT); 565 if (gbl_FADT->Pm2ControlBlock && gbl_FADT->Pm2ControlLength) 566 CPU_ACPI_BM_INFO(handle) |= BM_CTL; 567 568 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 569 570 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 571 572 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 573 (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type); 574 /* 575 * Allocate, initialize and install cstate kstat 576 */ 577 cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id, 578 name, "misc", 579 KSTAT_TYPE_NAMED, 580 sizeof (cpu_idle_kstat) / sizeof (kstat_named_t), 581 KSTAT_FLAG_VIRTUAL); 582 583 if (cstate->cs_ksp == NULL) { 584 cmn_err(CE_NOTE, "kstat_create(c_state) fail"); 585 } else { 586 cstate->cs_ksp->ks_data = &cpu_idle_kstat; 587 cstate->cs_ksp->ks_lock = &cpu_idle_mutex; 588 cstate->cs_ksp->ks_update = cpu_idle_kstat_update; 589 cstate->cs_ksp->ks_data_size += MAXNAMELEN; 590 cstate->cs_ksp->ks_private = cstate; 591 kstat_install(cstate->cs_ksp); 592 cstate++; 593 } 594 } 595 596 cpupm_alloc_domains(cp, CPUPM_C_STATES); 597 cpupm_alloc_ms_cstate(cp); 598 599 if (cpu_deep_cstates_supported()) { 600 mutex_enter(&cpu_idle_callb_mutex); 601 if (cpu_deep_idle_callb_id == (callb_id_t)0) 602 cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb, 603 (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle"); 604 if (cpu_idle_cpr_callb_id == (callb_id_t)0) 605 cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb, 606 (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr"); 607 mutex_exit(&cpu_idle_callb_mutex); 608 } 609 610 return (0); 611 } 612 613 /* 614 * Free resources allocated by cpu_idle_init(). 615 */ 616 static void 617 cpu_idle_fini(cpu_t *cp) 618 { 619 cpupm_mach_state_t *mach_state = 620 (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 621 cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 622 cpu_acpi_cstate_t *cstate; 623 uint_t cpu_max_cstates, i; 624 625 /* 626 * idle cpu points back to the generic one 627 */ 628 idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 629 disp_enq_thread = non_deep_idle_disp_enq_thread; 630 631 cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); 632 if (cstate) { 633 cpu_max_cstates = cpu_acpi_get_max_cstates(handle); 634 635 for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) { 636 if (cstate->cs_ksp != NULL) 637 kstat_delete(cstate->cs_ksp); 638 cstate++; 639 } 640 } 641 642 cpupm_free_ms_cstate(cp); 643 cpupm_free_domains(&cpupm_cstate_domains); 644 cpu_acpi_free_cstate_data(handle); 645 646 mutex_enter(&cpu_idle_callb_mutex); 647 if (cpu_deep_idle_callb_id != (callb_id_t)0) { 648 (void) callb_delete(cpu_deep_idle_callb_id); 649 cpu_deep_idle_callb_id = (callb_id_t)0; 650 } 651 if (cpu_idle_cpr_callb_id != (callb_id_t)0) { 652 (void) callb_delete(cpu_idle_cpr_callb_id); 653 cpu_idle_cpr_callb_id = (callb_id_t)0; 654 } 655 mutex_exit(&cpu_idle_callb_mutex); 656 } 657 658 /*ARGSUSED*/ 659 static boolean_t 660 cpu_deep_idle_callb(void *arg, int code) 661 { 662 boolean_t rslt = B_TRUE; 663 664 mutex_enter(&cpu_idle_callb_mutex); 665 switch (code) { 666 case PM_DEFAULT_CPU_DEEP_IDLE: 667 /* 668 * Default policy is same as enable 669 */ 670 /*FALLTHROUGH*/ 671 case PM_ENABLE_CPU_DEEP_IDLE: 672 if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0) 673 break; 674 675 if (hpet.callback(PM_ENABLE_CPU_DEEP_IDLE)) { 676 disp_enq_thread = cstate_wakeup; 677 idle_cpu = cpu_idle_adaptive; 678 cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG; 679 } else { 680 rslt = B_FALSE; 681 } 682 break; 683 684 case PM_DISABLE_CPU_DEEP_IDLE: 685 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 686 break; 687 688 idle_cpu = non_deep_idle_cpu; 689 if (hpet.callback(PM_DISABLE_CPU_DEEP_IDLE)) { 690 disp_enq_thread = non_deep_idle_disp_enq_thread; 691 cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG; 692 } 693 break; 694 695 default: 696 cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n", 697 code); 698 break; 699 } 700 mutex_exit(&cpu_idle_callb_mutex); 701 return (rslt); 702 } 703 704 /*ARGSUSED*/ 705 static boolean_t 706 cpu_idle_cpr_callb(void *arg, int code) 707 { 708 boolean_t rslt = B_TRUE; 709 710 mutex_enter(&cpu_idle_callb_mutex); 711 switch (code) { 712 case CB_CODE_CPR_RESUME: 713 if (hpet.callback(CB_CODE_CPR_RESUME)) { 714 /* 715 * Do not enable dispatcher hooks if disabled by user. 716 */ 717 if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) 718 break; 719 720 disp_enq_thread = cstate_wakeup; 721 idle_cpu = cpu_idle_adaptive; 722 } else { 723 rslt = B_FALSE; 724 } 725 break; 726 727 case CB_CODE_CPR_CHKPT: 728 idle_cpu = non_deep_idle_cpu; 729 disp_enq_thread = non_deep_idle_disp_enq_thread; 730 hpet.callback(CB_CODE_CPR_CHKPT); 731 break; 732 733 default: 734 cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code); 735 break; 736 } 737 mutex_exit(&cpu_idle_callb_mutex); 738 return (rslt); 739 } 740 741 /* 742 * handle _CST notification 743 */ 744 void 745 cpuidle_cstate_instance(cpu_t *cp) 746 { 747 #ifndef __xpv 748 cpupm_mach_state_t *mach_state = 749 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 750 cpu_acpi_handle_t handle; 751 struct machcpu *mcpu; 752 cpuset_t dom_cpu_set; 753 kmutex_t *pm_lock; 754 int result = 0; 755 processorid_t cpu_id; 756 757 if (mach_state == NULL) { 758 return; 759 } 760 761 ASSERT(mach_state->ms_cstate.cma_domain != NULL); 762 dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus; 763 pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock; 764 765 /* 766 * Do for all the CPU's in the domain 767 */ 768 mutex_enter(pm_lock); 769 do { 770 CPUSET_FIND(dom_cpu_set, cpu_id); 771 if (cpu_id == CPUSET_NOTINSET) 772 break; 773 774 ASSERT(cpu_id >= 0 && cpu_id < NCPU); 775 cp = cpu[cpu_id]; 776 mach_state = (cpupm_mach_state_t *) 777 cp->cpu_m.mcpu_pm_mach_state; 778 if (!(mach_state->ms_caps & CPUPM_C_STATES)) { 779 mutex_exit(pm_lock); 780 return; 781 } 782 handle = mach_state->ms_acpi_handle; 783 ASSERT(handle != NULL); 784 785 /* 786 * re-evaluate cstate object 787 */ 788 if (cpu_acpi_cache_cstate_data(handle) != 0) { 789 cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state" 790 " object Instance: %d", cpu_id); 791 } 792 mutex_enter(&cpu_lock); 793 mcpu = &(cp->cpu_m); 794 mcpu->max_cstates = cpu_acpi_get_max_cstates(handle); 795 if (mcpu->max_cstates > CPU_ACPI_C1) { 796 hpet.callback(CST_EVENT_MULTIPLE_CSTATES); 797 disp_enq_thread = cstate_wakeup; 798 cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 799 } else if (mcpu->max_cstates == CPU_ACPI_C1) { 800 disp_enq_thread = non_deep_idle_disp_enq_thread; 801 cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu; 802 hpet.callback(CST_EVENT_ONE_CSTATE); 803 } 804 mutex_exit(&cpu_lock); 805 806 CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result); 807 mutex_exit(pm_lock); 808 } while (result < 0); 809 #endif 810 } 811 812 /* 813 * handle the number or the type of available processor power states change 814 */ 815 void 816 cpuidle_manage_cstates(void *ctx) 817 { 818 cpu_t *cp = ctx; 819 processorid_t cpu_id = cp->cpu_id; 820 cpupm_mach_state_t *mach_state = 821 (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 822 boolean_t is_ready; 823 824 if (mach_state == NULL) { 825 return; 826 } 827 828 /* 829 * We currently refuse to power manage if the CPU is not ready to 830 * take cross calls (cross calls fail silently if CPU is not ready 831 * for it). 832 * 833 * Additionally, for x86 platforms we cannot power manage 834 * any one instance, until all instances have been initialized. 835 * That's because we don't know what the CPU domains look like 836 * until all instances have been initialized. 837 */ 838 is_ready = CPUPM_XCALL_IS_READY(cpu_id) && cpupm_cstate_ready(); 839 if (!is_ready) 840 return; 841 842 cpuidle_cstate_instance(cp); 843 } 844