10e751525SEric Saxe /* 20e751525SEric Saxe * CDDL HEADER START 30e751525SEric Saxe * 40e751525SEric Saxe * The contents of this file are subject to the terms of the 50e751525SEric Saxe * Common Development and Distribution License (the "License"). 60e751525SEric Saxe * You may not use this file except in compliance with the License. 70e751525SEric Saxe * 80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing. 100e751525SEric Saxe * See the License for the specific language governing permissions 110e751525SEric Saxe * and limitations under the License. 120e751525SEric Saxe * 130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each 140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the 160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying 170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner] 180e751525SEric Saxe * 190e751525SEric Saxe * CDDL HEADER END 200e751525SEric Saxe */ 210e751525SEric Saxe /* 220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230e751525SEric Saxe * Use is subject to license terms. 240e751525SEric Saxe */ 25cef70d2cSBill Holler /* 26cef70d2cSBill Holler * Copyright (c) 2009, Intel Corporation. 27cef70d2cSBill Holler * All rights reserved. 28cef70d2cSBill Holler */ 290e751525SEric Saxe 300e751525SEric Saxe #include <sys/cpu_pm.h> 310e751525SEric Saxe #include <sys/x86_archext.h> 320e751525SEric Saxe #include <sys/sdt.h> 330e751525SEric Saxe #include <sys/spl.h> 340e751525SEric Saxe #include <sys/machsystm.h> 35444f66e7SMark Haywood #include <sys/archsystm.h> 360e751525SEric Saxe #include <sys/hpet.h> 3778d5422cSMark Haywood #include <sys/acpi/acpi.h> 3878d5422cSMark Haywood #include <sys/acpica.h> 390e751525SEric Saxe #include <sys/cpupm.h> 400e751525SEric Saxe #include <sys/cpu_idle.h> 410e751525SEric Saxe #include <sys/cpu_acpi.h> 420e751525SEric Saxe #include <sys/cpupm_throttle.h> 439aa01d98SBill Holler #include <sys/dtrace.h> 44444f66e7SMark Haywood #include <sys/note.h> 450e751525SEric Saxe 460e751525SEric Saxe /* 470e751525SEric Saxe * This callback is used to build the PPM CPU domains once 48444f66e7SMark Haywood * a CPU device has been started. The callback is initialized 49444f66e7SMark Haywood * by the PPM driver to point to a routine that will build the 50444f66e7SMark Haywood * domains. 510e751525SEric Saxe */ 52444f66e7SMark Haywood void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *); 530e751525SEric Saxe 540e751525SEric Saxe /* 55444f66e7SMark Haywood * This callback is used to remove CPU from the PPM CPU domains 56444f66e7SMark Haywood * when the cpu driver is detached. The callback is initialized 57444f66e7SMark Haywood * by the PPM driver to point to a routine that will remove CPU 58444f66e7SMark Haywood * from the domains. 590e751525SEric Saxe */ 60444f66e7SMark Haywood void (*cpupm_ppm_free_pstate_domains)(cpu_t *); 610e751525SEric Saxe 620e751525SEric Saxe /* 630e751525SEric Saxe * This callback is used to redefine the topspeed for a CPU device. 640e751525SEric Saxe * Since all CPUs in a domain should have identical properties, this 650e751525SEric Saxe * callback is initialized by the PPM driver to point to a routine 660e751525SEric Saxe * that will redefine the topspeed for all devices in a CPU domain. 670e751525SEric Saxe * This callback is exercised whenever an ACPI _PPC change notification 680e751525SEric Saxe * is received by the CPU driver. 690e751525SEric Saxe */ 700e751525SEric Saxe void (*cpupm_redefine_topspeed)(void *); 710e751525SEric Saxe 720e751525SEric Saxe /* 730e751525SEric Saxe * This callback is used by the PPM driver to call into the CPU driver 740e751525SEric Saxe * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value). 750e751525SEric Saxe */ 760e751525SEric Saxe void (*cpupm_set_topspeed_callb)(void *, int); 770e751525SEric Saxe 780e751525SEric Saxe /* 790e751525SEric Saxe * This callback is used by the PPM driver to call into the CPU driver 800e751525SEric Saxe * to set a new topspeed for a CPU. 810e751525SEric Saxe */ 820e751525SEric Saxe int (*cpupm_get_topspeed_callb)(void *); 830e751525SEric Saxe 840e751525SEric Saxe static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *); 850e751525SEric Saxe static void cpupm_free_notify_handlers(cpu_t *); 8641333a9eSMark Haywood static void cpupm_power_manage_notifications(void *); 870e751525SEric Saxe 880e751525SEric Saxe /* 890e751525SEric Saxe * Until proven otherwise, all power states are manageable. 900e751525SEric Saxe */ 910e751525SEric Saxe static uint32_t cpupm_enabled = CPUPM_ALL_STATES; 920e751525SEric Saxe 930e751525SEric Saxe cpupm_state_domains_t *cpupm_pstate_domains = NULL; 940e751525SEric Saxe cpupm_state_domains_t *cpupm_tstate_domains = NULL; 950e751525SEric Saxe cpupm_state_domains_t *cpupm_cstate_domains = NULL; 960e751525SEric Saxe 970e751525SEric Saxe /* 980e751525SEric Saxe * c-state tunables 990e751525SEric Saxe * 1000fc6188aSaubrey.li@intel.com * cpupm_cs_sample_interval is the length of time we wait before 1010fc6188aSaubrey.li@intel.com * recalculating c-state statistics. When a CPU goes idle it checks 1020fc6188aSaubrey.li@intel.com * to see if it has been longer than cpupm_cs_sample_interval since it last 1030fc6188aSaubrey.li@intel.com * caculated which C-state to go to. 1040fc6188aSaubrey.li@intel.com * 1050e751525SEric Saxe * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle 1060e751525SEric Saxe * divided by time spent in the idle state transitions. 1070e751525SEric Saxe * A value of 10 means the CPU will not spend more than 1/10 of its time 1080e751525SEric Saxe * in idle latency. The worst case performance will be 90% of non Deep C-state 1090e751525SEric Saxe * kernel. 1100e751525SEric Saxe * 1110e751525SEric Saxe * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state 1120e751525SEric Saxe * before it is worth going there. Expressed as a multiple of latency. 1130e751525SEric Saxe */ 1140fc6188aSaubrey.li@intel.com uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */ 1150e751525SEric Saxe uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */ 1160e751525SEric Saxe uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */ 1170e751525SEric Saxe uint16_t cpupm_C2_idle_pct_tunable = 70; 1180e751525SEric Saxe uint16_t cpupm_C3_idle_pct_tunable = 80; 1190e751525SEric Saxe 1200e751525SEric Saxe #ifndef __xpv 1210e751525SEric Saxe extern boolean_t cpupm_intel_init(cpu_t *); 1220e751525SEric Saxe extern boolean_t cpupm_amd_init(cpu_t *); 1230e751525SEric Saxe 1240e751525SEric Saxe typedef struct cpupm_vendor { 1250e751525SEric Saxe boolean_t (*cpuv_init)(cpu_t *); 1260e751525SEric Saxe } cpupm_vendor_t; 1270e751525SEric Saxe 1280e751525SEric Saxe /* 1290e751525SEric Saxe * Table of supported vendors. 1300e751525SEric Saxe */ 1310e751525SEric Saxe static cpupm_vendor_t cpupm_vendors[] = { 1320e751525SEric Saxe cpupm_intel_init, 1330e751525SEric Saxe cpupm_amd_init, 1340e751525SEric Saxe NULL 1350e751525SEric Saxe }; 1360e751525SEric Saxe #endif 1370e751525SEric Saxe 1380e751525SEric Saxe /* 1390e751525SEric Saxe * Initialize the machine. 1400e751525SEric Saxe * See if a module exists for managing power for this CPU. 1410e751525SEric Saxe */ 1420e751525SEric Saxe /*ARGSUSED*/ 1430e751525SEric Saxe void 1440e751525SEric Saxe cpupm_init(cpu_t *cp) 1450e751525SEric Saxe { 1460e751525SEric Saxe #ifndef __xpv 1470e751525SEric Saxe cpupm_vendor_t *vendors; 1480e751525SEric Saxe cpupm_mach_state_t *mach_state; 1490e751525SEric Saxe struct machcpu *mcpu = &(cp->cpu_m); 15078d5422cSMark Haywood static boolean_t first = B_TRUE; 1510e751525SEric Saxe int *speeds; 1520e751525SEric Saxe uint_t nspeeds; 1530e751525SEric Saxe int ret; 1540e751525SEric Saxe 1550e751525SEric Saxe mach_state = cp->cpu_m.mcpu_pm_mach_state = 1560e751525SEric Saxe kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP); 1570e751525SEric Saxe mach_state->ms_caps = CPUPM_NO_STATES; 1580e751525SEric Saxe mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL); 1590e751525SEric Saxe 1600e751525SEric Saxe mach_state->ms_acpi_handle = cpu_acpi_init(cp); 1610e751525SEric Saxe if (mach_state->ms_acpi_handle == NULL) { 162444f66e7SMark Haywood cpupm_fini(cp); 1630e751525SEric Saxe cmn_err(CE_WARN, "!cpupm_init: processor %d: " 1640e751525SEric Saxe "unable to get ACPI handle", cp->cpu_id); 1650e751525SEric Saxe cmn_err(CE_NOTE, "!CPU power management will not function."); 1660e751525SEric Saxe CPUPM_DISABLE(); 16778d5422cSMark Haywood first = B_FALSE; 1680e751525SEric Saxe return; 1690e751525SEric Saxe } 1700e751525SEric Saxe 1710e751525SEric Saxe /* 1720e751525SEric Saxe * Loop through the CPU management module table and see if 1730e751525SEric Saxe * any of the modules implement CPU power management 1740e751525SEric Saxe * for this CPU. 1750e751525SEric Saxe */ 1760e751525SEric Saxe for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) { 1770e751525SEric Saxe if (vendors->cpuv_init(cp)) 1780e751525SEric Saxe break; 1790e751525SEric Saxe } 1800e751525SEric Saxe 1810e751525SEric Saxe /* 1820e751525SEric Saxe * Nope, we can't power manage this CPU. 1830e751525SEric Saxe */ 1840e751525SEric Saxe if (vendors == NULL) { 185444f66e7SMark Haywood cpupm_fini(cp); 1860e751525SEric Saxe CPUPM_DISABLE(); 18778d5422cSMark Haywood first = B_FALSE; 1880e751525SEric Saxe return; 1890e751525SEric Saxe } 1900e751525SEric Saxe 1910e751525SEric Saxe /* 1920e751525SEric Saxe * If P-state support exists for this system, then initialize it. 1930e751525SEric Saxe */ 1940e751525SEric Saxe if (mach_state->ms_pstate.cma_ops != NULL) { 1950e751525SEric Saxe ret = mach_state->ms_pstate.cma_ops->cpus_init(cp); 1960e751525SEric Saxe if (ret != 0) { 1970e751525SEric Saxe mach_state->ms_pstate.cma_ops = NULL; 1980e751525SEric Saxe cpupm_disable(CPUPM_P_STATES); 1990e751525SEric Saxe } else { 2000e751525SEric Saxe nspeeds = cpupm_get_speeds(cp, &speeds); 2010e751525SEric Saxe if (nspeeds == 0) { 20200f97612SMark Haywood cmn_err(CE_NOTE, "!cpupm_init: processor %d:" 2030e751525SEric Saxe " no speeds to manage", cp->cpu_id); 2040e751525SEric Saxe } else { 2050e751525SEric Saxe cpupm_set_supp_freqs(cp, speeds, nspeeds); 2060e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds); 2070e751525SEric Saxe mach_state->ms_caps |= CPUPM_P_STATES; 2080e751525SEric Saxe } 2090e751525SEric Saxe } 21029091f17SAnup Pemmaiah } else { 21129091f17SAnup Pemmaiah cpupm_disable(CPUPM_P_STATES); 2120e751525SEric Saxe } 2130e751525SEric Saxe 2140e751525SEric Saxe if (mach_state->ms_tstate.cma_ops != NULL) { 2150e751525SEric Saxe ret = mach_state->ms_tstate.cma_ops->cpus_init(cp); 2160e751525SEric Saxe if (ret != 0) { 2170e751525SEric Saxe mach_state->ms_tstate.cma_ops = NULL; 2180e751525SEric Saxe cpupm_disable(CPUPM_T_STATES); 2190e751525SEric Saxe } else { 2200e751525SEric Saxe mach_state->ms_caps |= CPUPM_T_STATES; 2210e751525SEric Saxe } 22229091f17SAnup Pemmaiah } else { 22329091f17SAnup Pemmaiah cpupm_disable(CPUPM_T_STATES); 2240e751525SEric Saxe } 2250e751525SEric Saxe 2260e751525SEric Saxe /* 2270e751525SEric Saxe * If C-states support exists for this system, then initialize it. 2280e751525SEric Saxe */ 2290e751525SEric Saxe if (mach_state->ms_cstate.cma_ops != NULL) { 2300e751525SEric Saxe ret = mach_state->ms_cstate.cma_ops->cpus_init(cp); 2310e751525SEric Saxe if (ret != 0) { 2320e751525SEric Saxe mach_state->ms_cstate.cma_ops = NULL; 2330e751525SEric Saxe mcpu->max_cstates = CPU_ACPI_C1; 2340e751525SEric Saxe cpupm_disable(CPUPM_C_STATES); 2350e751525SEric Saxe idle_cpu = non_deep_idle_cpu; 2360e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread; 2370e751525SEric Saxe } else if (cpu_deep_cstates_supported()) { 2380e751525SEric Saxe mcpu->max_cstates = cpu_acpi_get_max_cstates( 2390e751525SEric Saxe mach_state->ms_acpi_handle); 2400e751525SEric Saxe if (mcpu->max_cstates > CPU_ACPI_C1) { 241cef70d2cSBill Holler (void) cstate_timer_callback( 242cef70d2cSBill Holler CST_EVENT_MULTIPLE_CSTATES); 243*a3114836SGerry Liu cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle; 2440e751525SEric Saxe mcpu->mcpu_idle_type = CPU_ACPI_C1; 2450e751525SEric Saxe disp_enq_thread = cstate_wakeup; 2460e751525SEric Saxe } else { 247cef70d2cSBill Holler (void) cstate_timer_callback( 248cef70d2cSBill Holler CST_EVENT_ONE_CSTATE); 2490e751525SEric Saxe } 2500e751525SEric Saxe mach_state->ms_caps |= CPUPM_C_STATES; 2510e751525SEric Saxe } else { 2520e751525SEric Saxe mcpu->max_cstates = CPU_ACPI_C1; 2530e751525SEric Saxe idle_cpu = non_deep_idle_cpu; 2540e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread; 2550e751525SEric Saxe } 25629091f17SAnup Pemmaiah } else { 25729091f17SAnup Pemmaiah cpupm_disable(CPUPM_C_STATES); 2580e751525SEric Saxe } 2590e751525SEric Saxe 2600e751525SEric Saxe 2610e751525SEric Saxe if (mach_state->ms_caps == CPUPM_NO_STATES) { 262444f66e7SMark Haywood cpupm_fini(cp); 2630e751525SEric Saxe CPUPM_DISABLE(); 26478d5422cSMark Haywood first = B_FALSE; 2650e751525SEric Saxe return; 2660e751525SEric Saxe } 2670e751525SEric Saxe 2680e751525SEric Saxe if ((mach_state->ms_caps & CPUPM_T_STATES) || 2690e751525SEric Saxe (mach_state->ms_caps & CPUPM_P_STATES) || 27078d5422cSMark Haywood (mach_state->ms_caps & CPUPM_C_STATES)) { 27178d5422cSMark Haywood if (first) { 27278d5422cSMark Haywood acpica_write_cpupm_capabilities( 27378d5422cSMark Haywood mach_state->ms_caps & CPUPM_P_STATES, 27478d5422cSMark Haywood mach_state->ms_caps & CPUPM_C_STATES); 27578d5422cSMark Haywood } 27629091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_T_STATES) { 27741333a9eSMark Haywood cpupm_throttle_manage_notification(cp); 27829091f17SAnup Pemmaiah } 27929091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_C_STATES) { 28041333a9eSMark Haywood cpuidle_manage_cstates(cp); 28129091f17SAnup Pemmaiah } 28229091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_P_STATES) { 28341333a9eSMark Haywood cpupm_power_manage_notifications(cp); 28429091f17SAnup Pemmaiah } 28541333a9eSMark Haywood cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp); 28678d5422cSMark Haywood } 28778d5422cSMark Haywood first = B_FALSE; 2880e751525SEric Saxe #endif 2890e751525SEric Saxe } 2900e751525SEric Saxe 2910e751525SEric Saxe /* 292444f66e7SMark Haywood * Free any resources allocated during cpupm initialization or cpupm start. 2930e751525SEric Saxe */ 2940e751525SEric Saxe /*ARGSUSED*/ 2950e751525SEric Saxe void 296444f66e7SMark Haywood cpupm_free(cpu_t *cp, boolean_t cpupm_stop) 2970e751525SEric Saxe { 2980e751525SEric Saxe #ifndef __xpv 2990e751525SEric Saxe cpupm_mach_state_t *mach_state = 3000e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 3010e751525SEric Saxe 3020e751525SEric Saxe if (mach_state == NULL) 3030e751525SEric Saxe return; 304444f66e7SMark Haywood 3050e751525SEric Saxe if (mach_state->ms_pstate.cma_ops != NULL) { 306444f66e7SMark Haywood if (cpupm_stop) 307444f66e7SMark Haywood mach_state->ms_pstate.cma_ops->cpus_stop(cp); 308444f66e7SMark Haywood else 3090e751525SEric Saxe mach_state->ms_pstate.cma_ops->cpus_fini(cp); 3100e751525SEric Saxe mach_state->ms_pstate.cma_ops = NULL; 3110e751525SEric Saxe } 3120e751525SEric Saxe 3130e751525SEric Saxe if (mach_state->ms_tstate.cma_ops != NULL) { 314444f66e7SMark Haywood if (cpupm_stop) 315444f66e7SMark Haywood mach_state->ms_tstate.cma_ops->cpus_stop(cp); 316444f66e7SMark Haywood else 3170e751525SEric Saxe mach_state->ms_tstate.cma_ops->cpus_fini(cp); 3180e751525SEric Saxe mach_state->ms_tstate.cma_ops = NULL; 3190e751525SEric Saxe } 3200e751525SEric Saxe 3210e751525SEric Saxe if (mach_state->ms_cstate.cma_ops != NULL) { 322444f66e7SMark Haywood if (cpupm_stop) 323444f66e7SMark Haywood mach_state->ms_cstate.cma_ops->cpus_stop(cp); 324444f66e7SMark Haywood else 3250e751525SEric Saxe mach_state->ms_cstate.cma_ops->cpus_fini(cp); 326444f66e7SMark Haywood 3270e751525SEric Saxe mach_state->ms_cstate.cma_ops = NULL; 3280e751525SEric Saxe } 3290e751525SEric Saxe 3300e751525SEric Saxe cpupm_free_notify_handlers(cp); 3310e751525SEric Saxe 3320e751525SEric Saxe if (mach_state->ms_acpi_handle != NULL) { 3330e751525SEric Saxe cpu_acpi_fini(mach_state->ms_acpi_handle); 3340e751525SEric Saxe mach_state->ms_acpi_handle = NULL; 3350e751525SEric Saxe } 3360e751525SEric Saxe 3370e751525SEric Saxe mutex_destroy(&mach_state->ms_lock); 3380e751525SEric Saxe kmem_free(mach_state, sizeof (cpupm_mach_state_t)); 3390e751525SEric Saxe cp->cpu_m.mcpu_pm_mach_state = NULL; 3400e751525SEric Saxe #endif 3410e751525SEric Saxe } 3420e751525SEric Saxe 343444f66e7SMark Haywood void 344444f66e7SMark Haywood cpupm_fini(cpu_t *cp) 345444f66e7SMark Haywood { 3460e751525SEric Saxe /* 347444f66e7SMark Haywood * call (*cpus_fini)() ops to release the cpupm resource 348444f66e7SMark Haywood * in the P/C/T-state driver 349444f66e7SMark Haywood */ 350444f66e7SMark Haywood cpupm_free(cp, B_FALSE); 351444f66e7SMark Haywood } 352444f66e7SMark Haywood 353444f66e7SMark Haywood void 354444f66e7SMark Haywood cpupm_start(cpu_t *cp) 355444f66e7SMark Haywood { 356444f66e7SMark Haywood cpupm_init(cp); 357444f66e7SMark Haywood } 358444f66e7SMark Haywood 359444f66e7SMark Haywood void 360444f66e7SMark Haywood cpupm_stop(cpu_t *cp) 361444f66e7SMark Haywood { 362444f66e7SMark Haywood /* 363444f66e7SMark Haywood * call (*cpus_stop)() ops to reclaim the cpupm resource 364444f66e7SMark Haywood * in the P/C/T-state driver 365444f66e7SMark Haywood */ 366444f66e7SMark Haywood cpupm_free(cp, B_TRUE); 367444f66e7SMark Haywood } 368444f66e7SMark Haywood 369444f66e7SMark Haywood /* 370444f66e7SMark Haywood * If A CPU has started and at least one power state is manageable, 371444f66e7SMark Haywood * then the CPU is ready for power management. 3720e751525SEric Saxe */ 3730e751525SEric Saxe boolean_t 374444f66e7SMark Haywood cpupm_is_ready(cpu_t *cp) 3750e751525SEric Saxe { 3760e751525SEric Saxe #ifndef __xpv 377444f66e7SMark Haywood cpupm_mach_state_t *mach_state = 378444f66e7SMark Haywood (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 379444f66e7SMark Haywood uint32_t cpupm_caps = mach_state->ms_caps; 380444f66e7SMark Haywood 3810e751525SEric Saxe if (cpupm_enabled == CPUPM_NO_STATES) 3820e751525SEric Saxe return (B_FALSE); 383444f66e7SMark Haywood 384444f66e7SMark Haywood if ((cpupm_caps & CPUPM_T_STATES) || 385444f66e7SMark Haywood (cpupm_caps & CPUPM_P_STATES) || 386444f66e7SMark Haywood (cpupm_caps & CPUPM_C_STATES)) 387444f66e7SMark Haywood 388444f66e7SMark Haywood return (B_TRUE); 389444f66e7SMark Haywood return (B_FALSE); 3900e751525SEric Saxe #else 391444f66e7SMark Haywood _NOTE(ARGUNUSED(cp)); 3920e751525SEric Saxe return (B_FALSE); 3930e751525SEric Saxe #endif 3940e751525SEric Saxe } 3950e751525SEric Saxe 3960e751525SEric Saxe boolean_t 3970e751525SEric Saxe cpupm_is_enabled(uint32_t state) 3980e751525SEric Saxe { 3990e751525SEric Saxe return ((cpupm_enabled & state) == state); 4000e751525SEric Saxe } 4010e751525SEric Saxe 4020e751525SEric Saxe /* 4030e751525SEric Saxe * By default, all states are enabled. 4040e751525SEric Saxe */ 4050e751525SEric Saxe void 4060e751525SEric Saxe cpupm_disable(uint32_t state) 4070e751525SEric Saxe { 4080e751525SEric Saxe 4090e751525SEric Saxe if (state & CPUPM_P_STATES) { 4100e751525SEric Saxe cpupm_free_domains(&cpupm_pstate_domains); 4110e751525SEric Saxe } 4120e751525SEric Saxe if (state & CPUPM_T_STATES) { 4130e751525SEric Saxe cpupm_free_domains(&cpupm_tstate_domains); 4140e751525SEric Saxe } 4150e751525SEric Saxe if (state & CPUPM_C_STATES) { 4160e751525SEric Saxe cpupm_free_domains(&cpupm_cstate_domains); 4170e751525SEric Saxe } 4180e751525SEric Saxe cpupm_enabled &= ~state; 4190e751525SEric Saxe } 4200e751525SEric Saxe 4210e751525SEric Saxe /* 4220e751525SEric Saxe * Allocate power domains for C,P and T States 4230e751525SEric Saxe */ 4240e751525SEric Saxe void 4250e751525SEric Saxe cpupm_alloc_domains(cpu_t *cp, int state) 4260e751525SEric Saxe { 4270e751525SEric Saxe cpupm_mach_state_t *mach_state = 4280e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 4290e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 4300e751525SEric Saxe cpupm_state_domains_t **dom_ptr; 4310e751525SEric Saxe cpupm_state_domains_t *dptr; 4320e751525SEric Saxe cpupm_state_domains_t **mach_dom_state_ptr; 4330e751525SEric Saxe uint32_t domain; 4340e751525SEric Saxe uint32_t type; 4350e751525SEric Saxe 4360e751525SEric Saxe switch (state) { 4370e751525SEric Saxe case CPUPM_P_STATES: 4380e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) { 4390e751525SEric Saxe domain = CPU_ACPI_PSD(handle).sd_domain; 4400e751525SEric Saxe type = CPU_ACPI_PSD(handle).sd_type; 4410e751525SEric Saxe } else { 442*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) { 443*a3114836SGerry Liu domain = cpuid_get_chipid(cp); 444*a3114836SGerry Liu } else { 4450e751525SEric Saxe mutex_enter(&cpu_lock); 4460e751525SEric Saxe domain = cpuid_get_chipid(cp); 4470e751525SEric Saxe mutex_exit(&cpu_lock); 448*a3114836SGerry Liu } 4490e751525SEric Saxe type = CPU_ACPI_HW_ALL; 4500e751525SEric Saxe } 4510e751525SEric Saxe dom_ptr = &cpupm_pstate_domains; 4520e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain; 4530e751525SEric Saxe break; 4540e751525SEric Saxe case CPUPM_T_STATES: 4550e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) { 4560e751525SEric Saxe domain = CPU_ACPI_TSD(handle).sd_domain; 4570e751525SEric Saxe type = CPU_ACPI_TSD(handle).sd_type; 4580e751525SEric Saxe } else { 459*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) { 460*a3114836SGerry Liu domain = cpuid_get_chipid(cp); 461*a3114836SGerry Liu } else { 4620e751525SEric Saxe mutex_enter(&cpu_lock); 4630e751525SEric Saxe domain = cpuid_get_chipid(cp); 4640e751525SEric Saxe mutex_exit(&cpu_lock); 465*a3114836SGerry Liu } 4660e751525SEric Saxe type = CPU_ACPI_HW_ALL; 4670e751525SEric Saxe } 4680e751525SEric Saxe dom_ptr = &cpupm_tstate_domains; 4690e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain; 4700e751525SEric Saxe break; 4710e751525SEric Saxe case CPUPM_C_STATES: 4720e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) { 4730e751525SEric Saxe domain = CPU_ACPI_CSD(handle).sd_domain; 4740e751525SEric Saxe type = CPU_ACPI_CSD(handle).sd_type; 4750e751525SEric Saxe } else { 476*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) { 477*a3114836SGerry Liu domain = cpuid_get_coreid(cp); 478*a3114836SGerry Liu } else { 4790e751525SEric Saxe mutex_enter(&cpu_lock); 4800e751525SEric Saxe domain = cpuid_get_coreid(cp); 4810e751525SEric Saxe mutex_exit(&cpu_lock); 482*a3114836SGerry Liu } 4830e751525SEric Saxe type = CPU_ACPI_HW_ALL; 4840e751525SEric Saxe } 4850e751525SEric Saxe dom_ptr = &cpupm_cstate_domains; 4860e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain; 4870e751525SEric Saxe break; 4880e751525SEric Saxe default: 4890e751525SEric Saxe return; 4900e751525SEric Saxe } 4910e751525SEric Saxe 4920e751525SEric Saxe for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 4930e751525SEric Saxe if (dptr->pm_domain == domain) 4940e751525SEric Saxe break; 4950e751525SEric Saxe } 4960e751525SEric Saxe 4970e751525SEric Saxe /* new domain is created and linked at the head */ 4980e751525SEric Saxe if (dptr == NULL) { 4990e751525SEric Saxe dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP); 5000e751525SEric Saxe dptr->pm_domain = domain; 5010e751525SEric Saxe dptr->pm_type = type; 5020e751525SEric Saxe dptr->pm_next = *dom_ptr; 5030e751525SEric Saxe mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN, 5040e751525SEric Saxe (void *)ipltospl(DISP_LEVEL)); 5050e751525SEric Saxe CPUSET_ZERO(dptr->pm_cpus); 5060e751525SEric Saxe *dom_ptr = dptr; 5070e751525SEric Saxe } 5080e751525SEric Saxe CPUSET_ADD(dptr->pm_cpus, cp->cpu_id); 5090e751525SEric Saxe *mach_dom_state_ptr = dptr; 5100e751525SEric Saxe } 5110e751525SEric Saxe 5120e751525SEric Saxe /* 5130e751525SEric Saxe * Free C, P or T state power domains 5140e751525SEric Saxe */ 5150e751525SEric Saxe void 5160e751525SEric Saxe cpupm_free_domains(cpupm_state_domains_t **dom_ptr) 5170e751525SEric Saxe { 5180e751525SEric Saxe cpupm_state_domains_t *this_domain, *next_domain; 5190e751525SEric Saxe 5200e751525SEric Saxe this_domain = *dom_ptr; 5210e751525SEric Saxe while (this_domain != NULL) { 5220e751525SEric Saxe next_domain = this_domain->pm_next; 5230e751525SEric Saxe mutex_destroy(&this_domain->pm_lock); 5240e751525SEric Saxe kmem_free((void *)this_domain, 5250e751525SEric Saxe sizeof (cpupm_state_domains_t)); 5260e751525SEric Saxe this_domain = next_domain; 5270e751525SEric Saxe } 5280e751525SEric Saxe *dom_ptr = NULL; 5290e751525SEric Saxe } 5300e751525SEric Saxe 531444f66e7SMark Haywood /* 532444f66e7SMark Haywood * Remove CPU from C, P or T state power domains 533444f66e7SMark Haywood */ 534444f66e7SMark Haywood void 535444f66e7SMark Haywood cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr) 536444f66e7SMark Haywood { 537444f66e7SMark Haywood cpupm_mach_state_t *mach_state = 538444f66e7SMark Haywood (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 539444f66e7SMark Haywood cpupm_state_domains_t *dptr; 540444f66e7SMark Haywood uint32_t pm_domain; 541444f66e7SMark Haywood 542444f66e7SMark Haywood ASSERT(mach_state); 543444f66e7SMark Haywood 544444f66e7SMark Haywood switch (state) { 545444f66e7SMark Haywood case CPUPM_P_STATES: 546444f66e7SMark Haywood pm_domain = mach_state->ms_pstate.cma_domain->pm_domain; 547444f66e7SMark Haywood break; 548444f66e7SMark Haywood case CPUPM_T_STATES: 549444f66e7SMark Haywood pm_domain = mach_state->ms_tstate.cma_domain->pm_domain; 550444f66e7SMark Haywood break; 551444f66e7SMark Haywood case CPUPM_C_STATES: 552444f66e7SMark Haywood pm_domain = mach_state->ms_cstate.cma_domain->pm_domain; 553444f66e7SMark Haywood break; 554444f66e7SMark Haywood default: 555444f66e7SMark Haywood return; 556444f66e7SMark Haywood } 557444f66e7SMark Haywood 558444f66e7SMark Haywood /* 559444f66e7SMark Haywood * Find the CPU C, P or T state power domain 560444f66e7SMark Haywood */ 561444f66e7SMark Haywood for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) { 562444f66e7SMark Haywood if (dptr->pm_domain == pm_domain) 563444f66e7SMark Haywood break; 564444f66e7SMark Haywood } 565444f66e7SMark Haywood 566444f66e7SMark Haywood /* 567444f66e7SMark Haywood * return if no matched domain found 568444f66e7SMark Haywood */ 569444f66e7SMark Haywood if (dptr == NULL) 570444f66e7SMark Haywood return; 571444f66e7SMark Haywood 572444f66e7SMark Haywood /* 573444f66e7SMark Haywood * We found one matched power domain, remove CPU from its cpuset. 5746af9d452Saubrey.li@intel.com * pm_lock(spin lock) here to avoid the race conditions between 575444f66e7SMark Haywood * event change notification and cpu remove. 576444f66e7SMark Haywood */ 577444f66e7SMark Haywood mutex_enter(&dptr->pm_lock); 578444f66e7SMark Haywood if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id)) 579444f66e7SMark Haywood CPUSET_DEL(dptr->pm_cpus, cp->cpu_id); 580444f66e7SMark Haywood mutex_exit(&dptr->pm_lock); 581444f66e7SMark Haywood } 582444f66e7SMark Haywood 5830e751525SEric Saxe void 5840e751525SEric Saxe cpupm_alloc_ms_cstate(cpu_t *cp) 5850e751525SEric Saxe { 5860e751525SEric Saxe cpupm_mach_state_t *mach_state; 5870e751525SEric Saxe cpupm_mach_acpi_state_t *ms_cstate; 5880e751525SEric Saxe 5890e751525SEric Saxe mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 5900e751525SEric Saxe ms_cstate = &mach_state->ms_cstate; 5910e751525SEric Saxe ASSERT(ms_cstate->cma_state.cstate == NULL); 5920e751525SEric Saxe ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t), 5930e751525SEric Saxe KM_SLEEP); 5940e751525SEric Saxe ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1; 5950e751525SEric Saxe } 5960e751525SEric Saxe 5970e751525SEric Saxe void 5980e751525SEric Saxe cpupm_free_ms_cstate(cpu_t *cp) 5990e751525SEric Saxe { 6000e751525SEric Saxe cpupm_mach_state_t *mach_state = 6010e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 6020e751525SEric Saxe cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate; 6030e751525SEric Saxe 6040e751525SEric Saxe if (ms_cstate->cma_state.cstate != NULL) { 6050e751525SEric Saxe kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t)); 6060e751525SEric Saxe ms_cstate->cma_state.cstate = NULL; 6070e751525SEric Saxe } 6080e751525SEric Saxe } 6090e751525SEric Saxe 6100e751525SEric Saxe void 6110e751525SEric Saxe cpupm_state_change(cpu_t *cp, int level, int state) 6120e751525SEric Saxe { 6130e751525SEric Saxe cpupm_mach_state_t *mach_state = 6140e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 6150e751525SEric Saxe cpupm_state_ops_t *state_ops; 6160e751525SEric Saxe cpupm_state_domains_t *state_domain; 6170e751525SEric Saxe cpuset_t set; 6180e751525SEric Saxe 6190e751525SEric Saxe DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level); 6200e751525SEric Saxe 6210e751525SEric Saxe if (mach_state == NULL) { 6220e751525SEric Saxe return; 6230e751525SEric Saxe } 6240e751525SEric Saxe 6250e751525SEric Saxe switch (state) { 6260e751525SEric Saxe case CPUPM_P_STATES: 6270e751525SEric Saxe state_ops = mach_state->ms_pstate.cma_ops; 6280e751525SEric Saxe state_domain = mach_state->ms_pstate.cma_domain; 6290e751525SEric Saxe break; 6300e751525SEric Saxe case CPUPM_T_STATES: 6310e751525SEric Saxe state_ops = mach_state->ms_tstate.cma_ops; 6320e751525SEric Saxe state_domain = mach_state->ms_tstate.cma_domain; 6330e751525SEric Saxe break; 6340e751525SEric Saxe default: 6350e751525SEric Saxe break; 6360e751525SEric Saxe } 6370e751525SEric Saxe 6380e751525SEric Saxe switch (state_domain->pm_type) { 6390e751525SEric Saxe case CPU_ACPI_SW_ANY: 6400e751525SEric Saxe /* 6410e751525SEric Saxe * A request on any CPU in the domain transitions the domain 6420e751525SEric Saxe */ 6430e751525SEric Saxe CPUSET_ONLY(set, cp->cpu_id); 6440e751525SEric Saxe state_ops->cpus_change(set, level); 6450e751525SEric Saxe break; 6460e751525SEric Saxe case CPU_ACPI_SW_ALL: 6470e751525SEric Saxe /* 6480e751525SEric Saxe * All CPUs in the domain must request the transition 6490e751525SEric Saxe */ 6500e751525SEric Saxe case CPU_ACPI_HW_ALL: 6510e751525SEric Saxe /* 6520e751525SEric Saxe * P/T-state transitions are coordinated by the hardware 6530e751525SEric Saxe * For now, request the transition on all CPUs in the domain, 6540e751525SEric Saxe * but looking ahead we can probably be smarter about this. 6550e751525SEric Saxe */ 6560e751525SEric Saxe mutex_enter(&state_domain->pm_lock); 6570e751525SEric Saxe state_ops->cpus_change(state_domain->pm_cpus, level); 6580e751525SEric Saxe mutex_exit(&state_domain->pm_lock); 6590e751525SEric Saxe break; 6600e751525SEric Saxe default: 66100f97612SMark Haywood cmn_err(CE_NOTE, "Unknown domain coordination type: %d", 6620e751525SEric Saxe state_domain->pm_type); 6630e751525SEric Saxe } 6640e751525SEric Saxe } 6650e751525SEric Saxe 6660e751525SEric Saxe /* 6670e751525SEric Saxe * CPU PM interfaces exposed to the CPU power manager 6680e751525SEric Saxe */ 6690e751525SEric Saxe /*ARGSUSED*/ 6700e751525SEric Saxe id_t 6710e751525SEric Saxe cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type) 6720e751525SEric Saxe { 6730e751525SEric Saxe cpupm_mach_state_t *mach_state = 6740e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 6750e751525SEric Saxe 6760e751525SEric Saxe if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) && 6770e751525SEric Saxe !cpupm_is_enabled(CPUPM_C_STATES))) { 6780e751525SEric Saxe return (CPUPM_NO_DOMAIN); 6790e751525SEric Saxe } 6800e751525SEric Saxe if (type == CPUPM_DTYPE_ACTIVE) { 6810e751525SEric Saxe /* 6820e751525SEric Saxe * Return P-State domain for the specified CPU 6830e751525SEric Saxe */ 6840e751525SEric Saxe if (mach_state->ms_pstate.cma_domain) { 6850e751525SEric Saxe return (mach_state->ms_pstate.cma_domain->pm_domain); 6860e751525SEric Saxe } 6870e751525SEric Saxe } else if (type == CPUPM_DTYPE_IDLE) { 6880e751525SEric Saxe /* 6890e751525SEric Saxe * Return C-State domain for the specified CPU 6900e751525SEric Saxe */ 6910e751525SEric Saxe if (mach_state->ms_cstate.cma_domain) { 6920e751525SEric Saxe return (mach_state->ms_cstate.cma_domain->pm_domain); 6930e751525SEric Saxe } 6940e751525SEric Saxe } 6950e751525SEric Saxe return (CPUPM_NO_DOMAIN); 6960e751525SEric Saxe } 6970e751525SEric Saxe 6980e751525SEric Saxe /*ARGSUSED*/ 6990e751525SEric Saxe uint_t 7000e751525SEric Saxe cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type, 7010e751525SEric Saxe cpupm_state_t *states) 7020e751525SEric Saxe { 7030e751525SEric Saxe int *speeds; 7040e751525SEric Saxe uint_t nspeeds, i; 7050e751525SEric Saxe 7060e751525SEric Saxe /* 7070e751525SEric Saxe * Idle domain support unimplemented 7080e751525SEric Saxe */ 7090e751525SEric Saxe if (type != CPUPM_DTYPE_ACTIVE) { 7100e751525SEric Saxe return (0); 7110e751525SEric Saxe } 7120e751525SEric Saxe nspeeds = cpupm_get_speeds(cp, &speeds); 7130e751525SEric Saxe 7140e751525SEric Saxe /* 7150e751525SEric Saxe * If the caller passes NULL for states, just return the 7160e751525SEric Saxe * number of states. 7170e751525SEric Saxe */ 7180e751525SEric Saxe if (states != NULL) { 7190e751525SEric Saxe for (i = 0; i < nspeeds; i++) { 7200e751525SEric Saxe states[i].cps_speed = speeds[i]; 7210e751525SEric Saxe states[i].cps_handle = (cpupm_handle_t)i; 7220e751525SEric Saxe } 7230e751525SEric Saxe } 7240e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds); 7250e751525SEric Saxe return (nspeeds); 7260e751525SEric Saxe } 7270e751525SEric Saxe 7280e751525SEric Saxe /*ARGSUSED*/ 7290e751525SEric Saxe int 7300e751525SEric Saxe cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state) 7310e751525SEric Saxe { 732444f66e7SMark Haywood if (!cpupm_is_ready(cp)) 7330e751525SEric Saxe return (-1); 7340e751525SEric Saxe 7350e751525SEric Saxe cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES); 7360e751525SEric Saxe 7370e751525SEric Saxe return (0); 7380e751525SEric Saxe } 7390e751525SEric Saxe 7400e751525SEric Saxe /*ARGSUSED*/ 7410e751525SEric Saxe /* 7420e751525SEric Saxe * Note: It is the responsibility of the users of 7430e751525SEric Saxe * cpupm_get_speeds() to free the memory allocated 7440e751525SEric Saxe * for speeds using cpupm_free_speeds() 7450e751525SEric Saxe */ 7460e751525SEric Saxe uint_t 7470e751525SEric Saxe cpupm_get_speeds(cpu_t *cp, int **speeds) 7480e751525SEric Saxe { 7490e751525SEric Saxe #ifndef __xpv 7500e751525SEric Saxe cpupm_mach_state_t *mach_state = 7510e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 7520e751525SEric Saxe return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds)); 7530e751525SEric Saxe #else 7540e751525SEric Saxe return (0); 7550e751525SEric Saxe #endif 7560e751525SEric Saxe } 7570e751525SEric Saxe 7580e751525SEric Saxe /*ARGSUSED*/ 7590e751525SEric Saxe void 7600e751525SEric Saxe cpupm_free_speeds(int *speeds, uint_t nspeeds) 7610e751525SEric Saxe { 7620e751525SEric Saxe #ifndef __xpv 7630e751525SEric Saxe cpu_acpi_free_speeds(speeds, nspeeds); 7640e751525SEric Saxe #endif 7650e751525SEric Saxe } 7660e751525SEric Saxe 7670e751525SEric Saxe /* 7680e751525SEric Saxe * All CPU instances have been initialized successfully. 7690e751525SEric Saxe */ 7700e751525SEric Saxe boolean_t 771444f66e7SMark Haywood cpupm_power_ready(cpu_t *cp) 7720e751525SEric Saxe { 773444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp)); 7740e751525SEric Saxe } 7750e751525SEric Saxe 7760e751525SEric Saxe /* 7770e751525SEric Saxe * All CPU instances have been initialized successfully. 7780e751525SEric Saxe */ 7790e751525SEric Saxe boolean_t 780444f66e7SMark Haywood cpupm_throttle_ready(cpu_t *cp) 7810e751525SEric Saxe { 782444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp)); 7830e751525SEric Saxe } 7840e751525SEric Saxe 7850e751525SEric Saxe /* 7860e751525SEric Saxe * All CPU instances have been initialized successfully. 7870e751525SEric Saxe */ 7880e751525SEric Saxe boolean_t 789444f66e7SMark Haywood cpupm_cstate_ready(cpu_t *cp) 7900e751525SEric Saxe { 791444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp)); 7920e751525SEric Saxe } 7930e751525SEric Saxe 7940e751525SEric Saxe void 7950e751525SEric Saxe cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 7960e751525SEric Saxe { 7970e751525SEric Saxe cpu_t *cp = ctx; 7980e751525SEric Saxe cpupm_mach_state_t *mach_state = 7990e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 8000e751525SEric Saxe cpupm_notification_t *entry; 8010e751525SEric Saxe 8020e751525SEric Saxe mutex_enter(&mach_state->ms_lock); 8030e751525SEric Saxe for (entry = mach_state->ms_handlers; entry != NULL; 8040e751525SEric Saxe entry = entry->nq_next) { 8050e751525SEric Saxe entry->nq_handler(obj, val, entry->nq_ctx); 8060e751525SEric Saxe } 8070e751525SEric Saxe mutex_exit(&mach_state->ms_lock); 8080e751525SEric Saxe } 8090e751525SEric Saxe 8100e751525SEric Saxe /*ARGSUSED*/ 8110e751525SEric Saxe void 8120e751525SEric Saxe cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx) 8130e751525SEric Saxe { 8140e751525SEric Saxe #ifndef __xpv 8150e751525SEric Saxe cpupm_mach_state_t *mach_state = 8160e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 8170e751525SEric Saxe cpupm_notification_t *entry; 8180e751525SEric Saxe 8190e751525SEric Saxe entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP); 8200e751525SEric Saxe entry->nq_handler = handler; 8210e751525SEric Saxe entry->nq_ctx = ctx; 8220e751525SEric Saxe mutex_enter(&mach_state->ms_lock); 8230e751525SEric Saxe if (mach_state->ms_handlers == NULL) { 8240e751525SEric Saxe entry->nq_next = NULL; 8250e751525SEric Saxe mach_state->ms_handlers = entry; 8260e751525SEric Saxe cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle, 8270e751525SEric Saxe cpupm_notify_handler, cp); 8280e751525SEric Saxe 8290e751525SEric Saxe } else { 8300e751525SEric Saxe entry->nq_next = mach_state->ms_handlers; 8310e751525SEric Saxe mach_state->ms_handlers = entry; 8320e751525SEric Saxe } 8330e751525SEric Saxe mutex_exit(&mach_state->ms_lock); 8340e751525SEric Saxe #endif 8350e751525SEric Saxe } 8360e751525SEric Saxe 8370e751525SEric Saxe /*ARGSUSED*/ 8380e751525SEric Saxe static void 8390e751525SEric Saxe cpupm_free_notify_handlers(cpu_t *cp) 8400e751525SEric Saxe { 8410e751525SEric Saxe #ifndef __xpv 8420e751525SEric Saxe cpupm_mach_state_t *mach_state = 8430e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 8440e751525SEric Saxe cpupm_notification_t *entry; 8450e751525SEric Saxe cpupm_notification_t *next; 8460e751525SEric Saxe 8470e751525SEric Saxe mutex_enter(&mach_state->ms_lock); 8480e751525SEric Saxe if (mach_state->ms_handlers == NULL) { 8490e751525SEric Saxe mutex_exit(&mach_state->ms_lock); 8500e751525SEric Saxe return; 8510e751525SEric Saxe } 8520e751525SEric Saxe if (mach_state->ms_acpi_handle != NULL) { 8530e751525SEric Saxe cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle, 8540e751525SEric Saxe cpupm_notify_handler); 8550e751525SEric Saxe } 8560e751525SEric Saxe entry = mach_state->ms_handlers; 8570e751525SEric Saxe while (entry != NULL) { 8580e751525SEric Saxe next = entry->nq_next; 8590e751525SEric Saxe kmem_free(entry, sizeof (cpupm_notification_t)); 8600e751525SEric Saxe entry = next; 8610e751525SEric Saxe } 8620e751525SEric Saxe mach_state->ms_handlers = NULL; 8630e751525SEric Saxe mutex_exit(&mach_state->ms_lock); 8640e751525SEric Saxe #endif 8650e751525SEric Saxe } 8660e751525SEric Saxe 8670e751525SEric Saxe /* 8680e751525SEric Saxe * Get the current max speed from the ACPI _PPC object 8690e751525SEric Saxe */ 8700e751525SEric Saxe /*ARGSUSED*/ 8710e751525SEric Saxe int 8720e751525SEric Saxe cpupm_get_top_speed(cpu_t *cp) 8730e751525SEric Saxe { 8740e751525SEric Saxe #ifndef __xpv 8750e751525SEric Saxe cpupm_mach_state_t *mach_state; 8760e751525SEric Saxe cpu_acpi_handle_t handle; 8770e751525SEric Saxe int plat_level; 8780e751525SEric Saxe uint_t nspeeds; 8790e751525SEric Saxe int max_level; 8800e751525SEric Saxe 8810e751525SEric Saxe mach_state = 8820e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 8830e751525SEric Saxe handle = mach_state->ms_acpi_handle; 8840e751525SEric Saxe 8850e751525SEric Saxe cpu_acpi_cache_ppc(handle); 8860e751525SEric Saxe plat_level = CPU_ACPI_PPC(handle); 8870e751525SEric Saxe 8880e751525SEric Saxe nspeeds = CPU_ACPI_PSTATES_COUNT(handle); 8890e751525SEric Saxe 8900e751525SEric Saxe max_level = nspeeds - 1; 8910e751525SEric Saxe if ((plat_level < 0) || (plat_level > max_level)) { 8920e751525SEric Saxe cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: " 8930e751525SEric Saxe "_PPC out of range %d", cp->cpu_id, plat_level); 8940e751525SEric Saxe plat_level = 0; 8950e751525SEric Saxe } 8960e751525SEric Saxe 8970e751525SEric Saxe return (plat_level); 8980e751525SEric Saxe #else 8990e751525SEric Saxe return (0); 9000e751525SEric Saxe #endif 9010e751525SEric Saxe } 9020e751525SEric Saxe 9030e751525SEric Saxe /* 9040e751525SEric Saxe * This notification handler is called whenever the ACPI _PPC 9050e751525SEric Saxe * object changes. The _PPC is a sort of governor on power levels. 9060e751525SEric Saxe * It sets an upper threshold on which, _PSS defined, power levels 9070e751525SEric Saxe * are usuable. The _PPC value is dynamic and may change as properties 9080e751525SEric Saxe * (i.e., thermal or AC source) of the system change. 9090e751525SEric Saxe */ 9100e751525SEric Saxe 9110e751525SEric Saxe static void 9120e751525SEric Saxe cpupm_power_manage_notifications(void *ctx) 9130e751525SEric Saxe { 9140e751525SEric Saxe cpu_t *cp = ctx; 9150e751525SEric Saxe int top_speed; 9160e751525SEric Saxe 9170e751525SEric Saxe top_speed = cpupm_get_top_speed(cp); 9180e751525SEric Saxe cpupm_redefine_max_activepwr_state(cp, top_speed); 9190e751525SEric Saxe } 9200e751525SEric Saxe 9210e751525SEric Saxe /* ARGSUSED */ 9220e751525SEric Saxe static void 9230e751525SEric Saxe cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx) 9240e751525SEric Saxe { 9250e751525SEric Saxe #ifndef __xpv 926d218c8f0SMark Haywood 927d218c8f0SMark Haywood cpu_t *cp = ctx; 928d218c8f0SMark Haywood cpupm_mach_state_t *mach_state = 929d218c8f0SMark Haywood (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 930d218c8f0SMark Haywood 931d218c8f0SMark Haywood if (mach_state == NULL) 932d218c8f0SMark Haywood return; 933d218c8f0SMark Haywood 9340e751525SEric Saxe /* 9350e751525SEric Saxe * Currently, we handle _TPC,_CST and _PPC change notifications. 9360e751525SEric Saxe */ 937d218c8f0SMark Haywood if (val == CPUPM_TPC_CHANGE_NOTIFICATION && 938d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_T_STATES) { 9390e751525SEric Saxe cpupm_throttle_manage_notification(ctx); 940d218c8f0SMark Haywood } else if (val == CPUPM_CST_CHANGE_NOTIFICATION && 941d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_C_STATES) { 9420e751525SEric Saxe cpuidle_manage_cstates(ctx); 943d218c8f0SMark Haywood } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION && 944d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_P_STATES) { 9450e751525SEric Saxe cpupm_power_manage_notifications(ctx); 9460e751525SEric Saxe } 9470e751525SEric Saxe #endif 9480e751525SEric Saxe } 9490e751525SEric Saxe 9500e751525SEric Saxe /* 9510e751525SEric Saxe * Update cpupm cstate data each time CPU exits idle. 9520e751525SEric Saxe */ 9530e751525SEric Saxe void 9540e751525SEric Saxe cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end) 9550e751525SEric Saxe { 9560e751525SEric Saxe cs_data->cs_idle_exit = end; 9570e751525SEric Saxe } 9580e751525SEric Saxe 9590e751525SEric Saxe /* 9600e751525SEric Saxe * Determine next cstate based on cpupm data. 9610e751525SEric Saxe * Update cpupm cstate data each time CPU goes idle. 9620e751525SEric Saxe * Do as much as possible in the idle state bookkeeping function because the 9630e751525SEric Saxe * performance impact while idle is minimal compared to in the wakeup function 9640e751525SEric Saxe * when there is real work to do. 9650e751525SEric Saxe */ 9660e751525SEric Saxe uint32_t 9679aa01d98SBill Holler cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates, 9689aa01d98SBill Holler uint32_t cs_count, hrtime_t start) 9690e751525SEric Saxe { 9700e751525SEric Saxe hrtime_t duration; 9710e751525SEric Saxe hrtime_t ave_interval; 9720e751525SEric Saxe hrtime_t ave_idle_time; 9730fc6188aSaubrey.li@intel.com uint32_t i, smpl_cnt; 9740e751525SEric Saxe 9750e751525SEric Saxe duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter; 9760e751525SEric Saxe scalehrtime(&duration); 9770e751525SEric Saxe cs_data->cs_idle += duration; 9780e751525SEric Saxe cs_data->cs_idle_enter = start; 9790e751525SEric Saxe 9800fc6188aSaubrey.li@intel.com smpl_cnt = ++cs_data->cs_cnt; 9810e751525SEric Saxe cs_data->cs_smpl_len = start - cs_data->cs_smpl_start; 9820e751525SEric Saxe scalehrtime(&cs_data->cs_smpl_len); 9830fc6188aSaubrey.li@intel.com if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) { 9840e751525SEric Saxe cs_data->cs_smpl_idle = cs_data->cs_idle; 9850e751525SEric Saxe cs_data->cs_idle = 0; 9860e751525SEric Saxe cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) / 9870e751525SEric Saxe cs_data->cs_smpl_len); 9880e751525SEric Saxe 9890e751525SEric Saxe cs_data->cs_smpl_start = start; 9900e751525SEric Saxe cs_data->cs_cnt = 0; 9910e751525SEric Saxe 9920e751525SEric Saxe /* 9930e751525SEric Saxe * Strand level C-state policy 9949aa01d98SBill Holler * The cpu_acpi_cstate_t *cstates array is not required to 9959aa01d98SBill Holler * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3. 9969aa01d98SBill Holler * There are cs_count entries in the cstates array. 9979aa01d98SBill Holler * cs_data->cs_next_cstate contains the index of the next 9989aa01d98SBill Holler * C-state this CPU should enter. 9990e751525SEric Saxe */ 10009aa01d98SBill Holler ASSERT(cstates[0].cs_type == CPU_ACPI_C1); 10010e751525SEric Saxe 10020e751525SEric Saxe /* 10030e751525SEric Saxe * Will CPU be idle long enough to save power? 10040e751525SEric Saxe */ 10050fc6188aSaubrey.li@intel.com ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000; 10069aa01d98SBill Holler for (i = 1; i < cs_count; ++i) { 10079aa01d98SBill Holler if (ave_idle_time < (cstates[i].cs_latency * 10080e751525SEric Saxe cpupm_cs_idle_save_tunable)) { 10099aa01d98SBill Holler cs_count = i; 10109aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 10119aa01d98SBill Holler CPU, int, i); 10129aa01d98SBill Holler } 10130e751525SEric Saxe } 10140e751525SEric Saxe 10150e751525SEric Saxe /* 10160e751525SEric Saxe * Wakeup often (even when non-idle time is very short)? 10170e751525SEric Saxe * Some producer/consumer type loads fall into this category. 10180e751525SEric Saxe */ 10190fc6188aSaubrey.li@intel.com ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000; 10209aa01d98SBill Holler for (i = 1; i < cs_count; ++i) { 10219aa01d98SBill Holler if (ave_interval <= (cstates[i].cs_latency * 10229aa01d98SBill Holler cpupm_cs_idle_cost_tunable)) { 10239aa01d98SBill Holler cs_count = i; 10249aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, cpu_t *, 10259aa01d98SBill Holler CPU, int, (CPU_MAX_CSTATES + i)); 10269aa01d98SBill Holler } 10270e751525SEric Saxe } 10280e751525SEric Saxe 10290e751525SEric Saxe /* 10300e751525SEric Saxe * Idle percent 10310e751525SEric Saxe */ 10329aa01d98SBill Holler for (i = 1; i < cs_count; ++i) { 10339aa01d98SBill Holler switch (cstates[i].cs_type) { 10349aa01d98SBill Holler case CPU_ACPI_C2: 10359aa01d98SBill Holler if (cs_data->cs_smpl_idle_pct < 10369aa01d98SBill Holler cpupm_C2_idle_pct_tunable) { 10379aa01d98SBill Holler cs_count = i; 10389aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, 10399aa01d98SBill Holler cpu_t *, CPU, int, 10409aa01d98SBill Holler ((2 * CPU_MAX_CSTATES) + i)); 10410e751525SEric Saxe } 10429aa01d98SBill Holler break; 10439aa01d98SBill Holler 10449aa01d98SBill Holler case CPU_ACPI_C3: 10459aa01d98SBill Holler if (cs_data->cs_smpl_idle_pct < 10469aa01d98SBill Holler cpupm_C3_idle_pct_tunable) { 10479aa01d98SBill Holler cs_count = i; 10489aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, 10499aa01d98SBill Holler cpu_t *, CPU, int, 10509aa01d98SBill Holler ((2 * CPU_MAX_CSTATES) + i)); 10519aa01d98SBill Holler } 10529aa01d98SBill Holler break; 10539aa01d98SBill Holler } 10549aa01d98SBill Holler } 10559aa01d98SBill Holler 10569aa01d98SBill Holler cs_data->cs_next_cstate = cs_count - 1; 10570e751525SEric Saxe } 10580e751525SEric Saxe 10590e751525SEric Saxe return (cs_data->cs_next_cstate); 10600e751525SEric Saxe } 1061