10e751525SEric Saxe /*
20e751525SEric Saxe * CDDL HEADER START
30e751525SEric Saxe *
40e751525SEric Saxe * The contents of this file are subject to the terms of the
50e751525SEric Saxe * Common Development and Distribution License (the "License").
60e751525SEric Saxe * You may not use this file except in compliance with the License.
70e751525SEric Saxe *
80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe * See the License for the specific language governing permissions
110e751525SEric Saxe * and limitations under the License.
120e751525SEric Saxe *
130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe *
190e751525SEric Saxe * CDDL HEADER END
200e751525SEric Saxe */
210e751525SEric Saxe /*
220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230e751525SEric Saxe * Use is subject to license terms.
240e751525SEric Saxe */
25cef70d2cSBill Holler /*
26cef70d2cSBill Holler * Copyright (c) 2009, Intel Corporation.
27cef70d2cSBill Holler * All rights reserved.
28cef70d2cSBill Holler */
290e751525SEric Saxe
300e751525SEric Saxe #include <sys/cpu_pm.h>
310e751525SEric Saxe #include <sys/x86_archext.h>
320e751525SEric Saxe #include <sys/sdt.h>
330e751525SEric Saxe #include <sys/spl.h>
340e751525SEric Saxe #include <sys/machsystm.h>
35444f66e7SMark Haywood #include <sys/archsystm.h>
360e751525SEric Saxe #include <sys/hpet.h>
3778d5422cSMark Haywood #include <sys/acpi/acpi.h>
3878d5422cSMark Haywood #include <sys/acpica.h>
390e751525SEric Saxe #include <sys/cpupm.h>
400e751525SEric Saxe #include <sys/cpu_idle.h>
410e751525SEric Saxe #include <sys/cpu_acpi.h>
420e751525SEric Saxe #include <sys/cpupm_throttle.h>
439aa01d98SBill Holler #include <sys/dtrace.h>
44444f66e7SMark Haywood #include <sys/note.h>
450e751525SEric Saxe
460e751525SEric Saxe /*
470e751525SEric Saxe * This callback is used to build the PPM CPU domains once
48444f66e7SMark Haywood * a CPU device has been started. The callback is initialized
49444f66e7SMark Haywood * by the PPM driver to point to a routine that will build the
50444f66e7SMark Haywood * domains.
510e751525SEric Saxe */
52444f66e7SMark Haywood void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *);
530e751525SEric Saxe
540e751525SEric Saxe /*
55444f66e7SMark Haywood * This callback is used to remove CPU from the PPM CPU domains
56444f66e7SMark Haywood * when the cpu driver is detached. The callback is initialized
57444f66e7SMark Haywood * by the PPM driver to point to a routine that will remove CPU
58444f66e7SMark Haywood * from the domains.
590e751525SEric Saxe */
60444f66e7SMark Haywood void (*cpupm_ppm_free_pstate_domains)(cpu_t *);
610e751525SEric Saxe
620e751525SEric Saxe /*
630e751525SEric Saxe * This callback is used to redefine the topspeed for a CPU device.
640e751525SEric Saxe * Since all CPUs in a domain should have identical properties, this
650e751525SEric Saxe * callback is initialized by the PPM driver to point to a routine
660e751525SEric Saxe * that will redefine the topspeed for all devices in a CPU domain.
670e751525SEric Saxe * This callback is exercised whenever an ACPI _PPC change notification
680e751525SEric Saxe * is received by the CPU driver.
690e751525SEric Saxe */
700e751525SEric Saxe void (*cpupm_redefine_topspeed)(void *);
710e751525SEric Saxe
720e751525SEric Saxe /*
730e751525SEric Saxe * This callback is used by the PPM driver to call into the CPU driver
740e751525SEric Saxe * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value).
750e751525SEric Saxe */
760e751525SEric Saxe void (*cpupm_set_topspeed_callb)(void *, int);
770e751525SEric Saxe
780e751525SEric Saxe /*
790e751525SEric Saxe * This callback is used by the PPM driver to call into the CPU driver
800e751525SEric Saxe * to set a new topspeed for a CPU.
810e751525SEric Saxe */
820e751525SEric Saxe int (*cpupm_get_topspeed_callb)(void *);
830e751525SEric Saxe
840e751525SEric Saxe static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *);
850e751525SEric Saxe static void cpupm_free_notify_handlers(cpu_t *);
8641333a9eSMark Haywood static void cpupm_power_manage_notifications(void *);
870e751525SEric Saxe
880e751525SEric Saxe /*
890e751525SEric Saxe * Until proven otherwise, all power states are manageable.
900e751525SEric Saxe */
910e751525SEric Saxe static uint32_t cpupm_enabled = CPUPM_ALL_STATES;
920e751525SEric Saxe
930e751525SEric Saxe cpupm_state_domains_t *cpupm_pstate_domains = NULL;
940e751525SEric Saxe cpupm_state_domains_t *cpupm_tstate_domains = NULL;
950e751525SEric Saxe cpupm_state_domains_t *cpupm_cstate_domains = NULL;
960e751525SEric Saxe
970e751525SEric Saxe /*
980e751525SEric Saxe * c-state tunables
990e751525SEric Saxe *
1000fc6188aSaubrey.li@intel.com * cpupm_cs_sample_interval is the length of time we wait before
1010fc6188aSaubrey.li@intel.com * recalculating c-state statistics. When a CPU goes idle it checks
1020fc6188aSaubrey.li@intel.com * to see if it has been longer than cpupm_cs_sample_interval since it last
1030fc6188aSaubrey.li@intel.com * caculated which C-state to go to.
1040fc6188aSaubrey.li@intel.com *
1050e751525SEric Saxe * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle
1060e751525SEric Saxe * divided by time spent in the idle state transitions.
1070e751525SEric Saxe * A value of 10 means the CPU will not spend more than 1/10 of its time
1080e751525SEric Saxe * in idle latency. The worst case performance will be 90% of non Deep C-state
1090e751525SEric Saxe * kernel.
1100e751525SEric Saxe *
1110e751525SEric Saxe * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state
1120e751525SEric Saxe * before it is worth going there. Expressed as a multiple of latency.
1130e751525SEric Saxe */
1140fc6188aSaubrey.li@intel.com uint32_t cpupm_cs_sample_interval = 100*1000*1000; /* 100 milliseconds */
1150e751525SEric Saxe uint32_t cpupm_cs_idle_cost_tunable = 10; /* work time / latency cost */
1160e751525SEric Saxe uint32_t cpupm_cs_idle_save_tunable = 2; /* idle power savings */
1170e751525SEric Saxe uint16_t cpupm_C2_idle_pct_tunable = 70;
1180e751525SEric Saxe uint16_t cpupm_C3_idle_pct_tunable = 80;
1190e751525SEric Saxe
1200e751525SEric Saxe #ifndef __xpv
1210e751525SEric Saxe extern boolean_t cpupm_intel_init(cpu_t *);
1220e751525SEric Saxe extern boolean_t cpupm_amd_init(cpu_t *);
1230e751525SEric Saxe
1240e751525SEric Saxe typedef struct cpupm_vendor {
1250e751525SEric Saxe boolean_t (*cpuv_init)(cpu_t *);
1260e751525SEric Saxe } cpupm_vendor_t;
1270e751525SEric Saxe
1280e751525SEric Saxe /*
1290e751525SEric Saxe * Table of supported vendors.
1300e751525SEric Saxe */
1310e751525SEric Saxe static cpupm_vendor_t cpupm_vendors[] = {
1320e751525SEric Saxe cpupm_intel_init,
1330e751525SEric Saxe cpupm_amd_init,
1340e751525SEric Saxe NULL
1350e751525SEric Saxe };
1360e751525SEric Saxe #endif
1370e751525SEric Saxe
1380e751525SEric Saxe /*
1390e751525SEric Saxe * Initialize the machine.
1400e751525SEric Saxe * See if a module exists for managing power for this CPU.
1410e751525SEric Saxe */
1420e751525SEric Saxe /*ARGSUSED*/
1430e751525SEric Saxe void
cpupm_init(cpu_t * cp)1440e751525SEric Saxe cpupm_init(cpu_t *cp)
1450e751525SEric Saxe {
1460e751525SEric Saxe #ifndef __xpv
1470e751525SEric Saxe cpupm_vendor_t *vendors;
1480e751525SEric Saxe cpupm_mach_state_t *mach_state;
1490e751525SEric Saxe struct machcpu *mcpu = &(cp->cpu_m);
15078d5422cSMark Haywood static boolean_t first = B_TRUE;
1510e751525SEric Saxe int *speeds;
1520e751525SEric Saxe uint_t nspeeds;
1530e751525SEric Saxe int ret;
1540e751525SEric Saxe
1550e751525SEric Saxe mach_state = cp->cpu_m.mcpu_pm_mach_state =
1560e751525SEric Saxe kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP);
1570e751525SEric Saxe mach_state->ms_caps = CPUPM_NO_STATES;
1580e751525SEric Saxe mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL);
1590e751525SEric Saxe
1600e751525SEric Saxe mach_state->ms_acpi_handle = cpu_acpi_init(cp);
1610e751525SEric Saxe if (mach_state->ms_acpi_handle == NULL) {
162444f66e7SMark Haywood cpupm_fini(cp);
1630e751525SEric Saxe cmn_err(CE_WARN, "!cpupm_init: processor %d: "
1640e751525SEric Saxe "unable to get ACPI handle", cp->cpu_id);
1650e751525SEric Saxe cmn_err(CE_NOTE, "!CPU power management will not function.");
1660e751525SEric Saxe CPUPM_DISABLE();
16778d5422cSMark Haywood first = B_FALSE;
1680e751525SEric Saxe return;
1690e751525SEric Saxe }
1700e751525SEric Saxe
1710e751525SEric Saxe /*
1720e751525SEric Saxe * Loop through the CPU management module table and see if
1730e751525SEric Saxe * any of the modules implement CPU power management
1740e751525SEric Saxe * for this CPU.
1750e751525SEric Saxe */
1760e751525SEric Saxe for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) {
1770e751525SEric Saxe if (vendors->cpuv_init(cp))
1780e751525SEric Saxe break;
1790e751525SEric Saxe }
1800e751525SEric Saxe
1810e751525SEric Saxe /*
1820e751525SEric Saxe * Nope, we can't power manage this CPU.
1830e751525SEric Saxe */
1840e751525SEric Saxe if (vendors == NULL) {
185444f66e7SMark Haywood cpupm_fini(cp);
1860e751525SEric Saxe CPUPM_DISABLE();
18778d5422cSMark Haywood first = B_FALSE;
1880e751525SEric Saxe return;
1890e751525SEric Saxe }
1900e751525SEric Saxe
1910e751525SEric Saxe /*
1920e751525SEric Saxe * If P-state support exists for this system, then initialize it.
1930e751525SEric Saxe */
1940e751525SEric Saxe if (mach_state->ms_pstate.cma_ops != NULL) {
1950e751525SEric Saxe ret = mach_state->ms_pstate.cma_ops->cpus_init(cp);
1960e751525SEric Saxe if (ret != 0) {
1970e751525SEric Saxe mach_state->ms_pstate.cma_ops = NULL;
1980e751525SEric Saxe cpupm_disable(CPUPM_P_STATES);
1990e751525SEric Saxe } else {
2000e751525SEric Saxe nspeeds = cpupm_get_speeds(cp, &speeds);
2010e751525SEric Saxe if (nspeeds == 0) {
20200f97612SMark Haywood cmn_err(CE_NOTE, "!cpupm_init: processor %d:"
2030e751525SEric Saxe " no speeds to manage", cp->cpu_id);
2040e751525SEric Saxe } else {
2050e751525SEric Saxe cpupm_set_supp_freqs(cp, speeds, nspeeds);
2060e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds);
2070e751525SEric Saxe mach_state->ms_caps |= CPUPM_P_STATES;
2080e751525SEric Saxe }
2090e751525SEric Saxe }
21029091f17SAnup Pemmaiah } else {
21129091f17SAnup Pemmaiah cpupm_disable(CPUPM_P_STATES);
2120e751525SEric Saxe }
2130e751525SEric Saxe
2140e751525SEric Saxe if (mach_state->ms_tstate.cma_ops != NULL) {
2150e751525SEric Saxe ret = mach_state->ms_tstate.cma_ops->cpus_init(cp);
2160e751525SEric Saxe if (ret != 0) {
2170e751525SEric Saxe mach_state->ms_tstate.cma_ops = NULL;
2180e751525SEric Saxe cpupm_disable(CPUPM_T_STATES);
2190e751525SEric Saxe } else {
2200e751525SEric Saxe mach_state->ms_caps |= CPUPM_T_STATES;
2210e751525SEric Saxe }
22229091f17SAnup Pemmaiah } else {
22329091f17SAnup Pemmaiah cpupm_disable(CPUPM_T_STATES);
2240e751525SEric Saxe }
2250e751525SEric Saxe
2260e751525SEric Saxe /*
2270e751525SEric Saxe * If C-states support exists for this system, then initialize it.
2280e751525SEric Saxe */
2290e751525SEric Saxe if (mach_state->ms_cstate.cma_ops != NULL) {
2300e751525SEric Saxe ret = mach_state->ms_cstate.cma_ops->cpus_init(cp);
2310e751525SEric Saxe if (ret != 0) {
2320e751525SEric Saxe mach_state->ms_cstate.cma_ops = NULL;
2330e751525SEric Saxe mcpu->max_cstates = CPU_ACPI_C1;
2340e751525SEric Saxe cpupm_disable(CPUPM_C_STATES);
2350e751525SEric Saxe idle_cpu = non_deep_idle_cpu;
2360e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
2370e751525SEric Saxe } else if (cpu_deep_cstates_supported()) {
2380e751525SEric Saxe mcpu->max_cstates = cpu_acpi_get_max_cstates(
2390e751525SEric Saxe mach_state->ms_acpi_handle);
2400e751525SEric Saxe if (mcpu->max_cstates > CPU_ACPI_C1) {
241cef70d2cSBill Holler (void) cstate_timer_callback(
242cef70d2cSBill Holler CST_EVENT_MULTIPLE_CSTATES);
243*a3114836SGerry Liu cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
2440e751525SEric Saxe mcpu->mcpu_idle_type = CPU_ACPI_C1;
2450e751525SEric Saxe disp_enq_thread = cstate_wakeup;
2460e751525SEric Saxe } else {
247cef70d2cSBill Holler (void) cstate_timer_callback(
248cef70d2cSBill Holler CST_EVENT_ONE_CSTATE);
2490e751525SEric Saxe }
2500e751525SEric Saxe mach_state->ms_caps |= CPUPM_C_STATES;
2510e751525SEric Saxe } else {
2520e751525SEric Saxe mcpu->max_cstates = CPU_ACPI_C1;
2530e751525SEric Saxe idle_cpu = non_deep_idle_cpu;
2540e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
2550e751525SEric Saxe }
25629091f17SAnup Pemmaiah } else {
25729091f17SAnup Pemmaiah cpupm_disable(CPUPM_C_STATES);
2580e751525SEric Saxe }
2590e751525SEric Saxe
2600e751525SEric Saxe
2610e751525SEric Saxe if (mach_state->ms_caps == CPUPM_NO_STATES) {
262444f66e7SMark Haywood cpupm_fini(cp);
2630e751525SEric Saxe CPUPM_DISABLE();
26478d5422cSMark Haywood first = B_FALSE;
2650e751525SEric Saxe return;
2660e751525SEric Saxe }
2670e751525SEric Saxe
2680e751525SEric Saxe if ((mach_state->ms_caps & CPUPM_T_STATES) ||
2690e751525SEric Saxe (mach_state->ms_caps & CPUPM_P_STATES) ||
27078d5422cSMark Haywood (mach_state->ms_caps & CPUPM_C_STATES)) {
27178d5422cSMark Haywood if (first) {
27278d5422cSMark Haywood acpica_write_cpupm_capabilities(
27378d5422cSMark Haywood mach_state->ms_caps & CPUPM_P_STATES,
27478d5422cSMark Haywood mach_state->ms_caps & CPUPM_C_STATES);
27578d5422cSMark Haywood }
27629091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_T_STATES) {
27741333a9eSMark Haywood cpupm_throttle_manage_notification(cp);
27829091f17SAnup Pemmaiah }
27929091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_C_STATES) {
28041333a9eSMark Haywood cpuidle_manage_cstates(cp);
28129091f17SAnup Pemmaiah }
28229091f17SAnup Pemmaiah if (mach_state->ms_caps & CPUPM_P_STATES) {
28341333a9eSMark Haywood cpupm_power_manage_notifications(cp);
28429091f17SAnup Pemmaiah }
28541333a9eSMark Haywood cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp);
28678d5422cSMark Haywood }
28778d5422cSMark Haywood first = B_FALSE;
2880e751525SEric Saxe #endif
2890e751525SEric Saxe }
2900e751525SEric Saxe
2910e751525SEric Saxe /*
292444f66e7SMark Haywood * Free any resources allocated during cpupm initialization or cpupm start.
2930e751525SEric Saxe */
2940e751525SEric Saxe /*ARGSUSED*/
2950e751525SEric Saxe void
cpupm_free(cpu_t * cp,boolean_t cpupm_stop)296444f66e7SMark Haywood cpupm_free(cpu_t *cp, boolean_t cpupm_stop)
2970e751525SEric Saxe {
2980e751525SEric Saxe #ifndef __xpv
2990e751525SEric Saxe cpupm_mach_state_t *mach_state =
3000e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
3010e751525SEric Saxe
3020e751525SEric Saxe if (mach_state == NULL)
3030e751525SEric Saxe return;
304444f66e7SMark Haywood
3050e751525SEric Saxe if (mach_state->ms_pstate.cma_ops != NULL) {
306444f66e7SMark Haywood if (cpupm_stop)
307444f66e7SMark Haywood mach_state->ms_pstate.cma_ops->cpus_stop(cp);
308444f66e7SMark Haywood else
3090e751525SEric Saxe mach_state->ms_pstate.cma_ops->cpus_fini(cp);
3100e751525SEric Saxe mach_state->ms_pstate.cma_ops = NULL;
3110e751525SEric Saxe }
3120e751525SEric Saxe
3130e751525SEric Saxe if (mach_state->ms_tstate.cma_ops != NULL) {
314444f66e7SMark Haywood if (cpupm_stop)
315444f66e7SMark Haywood mach_state->ms_tstate.cma_ops->cpus_stop(cp);
316444f66e7SMark Haywood else
3170e751525SEric Saxe mach_state->ms_tstate.cma_ops->cpus_fini(cp);
3180e751525SEric Saxe mach_state->ms_tstate.cma_ops = NULL;
3190e751525SEric Saxe }
3200e751525SEric Saxe
3210e751525SEric Saxe if (mach_state->ms_cstate.cma_ops != NULL) {
322444f66e7SMark Haywood if (cpupm_stop)
323444f66e7SMark Haywood mach_state->ms_cstate.cma_ops->cpus_stop(cp);
324444f66e7SMark Haywood else
3250e751525SEric Saxe mach_state->ms_cstate.cma_ops->cpus_fini(cp);
326444f66e7SMark Haywood
3270e751525SEric Saxe mach_state->ms_cstate.cma_ops = NULL;
3280e751525SEric Saxe }
3290e751525SEric Saxe
3300e751525SEric Saxe cpupm_free_notify_handlers(cp);
3310e751525SEric Saxe
3320e751525SEric Saxe if (mach_state->ms_acpi_handle != NULL) {
3330e751525SEric Saxe cpu_acpi_fini(mach_state->ms_acpi_handle);
3340e751525SEric Saxe mach_state->ms_acpi_handle = NULL;
3350e751525SEric Saxe }
3360e751525SEric Saxe
3370e751525SEric Saxe mutex_destroy(&mach_state->ms_lock);
3380e751525SEric Saxe kmem_free(mach_state, sizeof (cpupm_mach_state_t));
3390e751525SEric Saxe cp->cpu_m.mcpu_pm_mach_state = NULL;
3400e751525SEric Saxe #endif
3410e751525SEric Saxe }
3420e751525SEric Saxe
343444f66e7SMark Haywood void
cpupm_fini(cpu_t * cp)344444f66e7SMark Haywood cpupm_fini(cpu_t *cp)
345444f66e7SMark Haywood {
3460e751525SEric Saxe /*
347444f66e7SMark Haywood * call (*cpus_fini)() ops to release the cpupm resource
348444f66e7SMark Haywood * in the P/C/T-state driver
349444f66e7SMark Haywood */
350444f66e7SMark Haywood cpupm_free(cp, B_FALSE);
351444f66e7SMark Haywood }
352444f66e7SMark Haywood
353444f66e7SMark Haywood void
cpupm_start(cpu_t * cp)354444f66e7SMark Haywood cpupm_start(cpu_t *cp)
355444f66e7SMark Haywood {
356444f66e7SMark Haywood cpupm_init(cp);
357444f66e7SMark Haywood }
358444f66e7SMark Haywood
359444f66e7SMark Haywood void
cpupm_stop(cpu_t * cp)360444f66e7SMark Haywood cpupm_stop(cpu_t *cp)
361444f66e7SMark Haywood {
362444f66e7SMark Haywood /*
363444f66e7SMark Haywood * call (*cpus_stop)() ops to reclaim the cpupm resource
364444f66e7SMark Haywood * in the P/C/T-state driver
365444f66e7SMark Haywood */
366444f66e7SMark Haywood cpupm_free(cp, B_TRUE);
367444f66e7SMark Haywood }
368444f66e7SMark Haywood
369444f66e7SMark Haywood /*
370444f66e7SMark Haywood * If A CPU has started and at least one power state is manageable,
371444f66e7SMark Haywood * then the CPU is ready for power management.
3720e751525SEric Saxe */
3730e751525SEric Saxe boolean_t
cpupm_is_ready(cpu_t * cp)374444f66e7SMark Haywood cpupm_is_ready(cpu_t *cp)
3750e751525SEric Saxe {
3760e751525SEric Saxe #ifndef __xpv
377444f66e7SMark Haywood cpupm_mach_state_t *mach_state =
378444f66e7SMark Haywood (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
379444f66e7SMark Haywood uint32_t cpupm_caps = mach_state->ms_caps;
380444f66e7SMark Haywood
3810e751525SEric Saxe if (cpupm_enabled == CPUPM_NO_STATES)
3820e751525SEric Saxe return (B_FALSE);
383444f66e7SMark Haywood
384444f66e7SMark Haywood if ((cpupm_caps & CPUPM_T_STATES) ||
385444f66e7SMark Haywood (cpupm_caps & CPUPM_P_STATES) ||
386444f66e7SMark Haywood (cpupm_caps & CPUPM_C_STATES))
387444f66e7SMark Haywood
388444f66e7SMark Haywood return (B_TRUE);
389444f66e7SMark Haywood return (B_FALSE);
3900e751525SEric Saxe #else
391444f66e7SMark Haywood _NOTE(ARGUNUSED(cp));
3920e751525SEric Saxe return (B_FALSE);
3930e751525SEric Saxe #endif
3940e751525SEric Saxe }
3950e751525SEric Saxe
3960e751525SEric Saxe boolean_t
cpupm_is_enabled(uint32_t state)3970e751525SEric Saxe cpupm_is_enabled(uint32_t state)
3980e751525SEric Saxe {
3990e751525SEric Saxe return ((cpupm_enabled & state) == state);
4000e751525SEric Saxe }
4010e751525SEric Saxe
4020e751525SEric Saxe /*
4030e751525SEric Saxe * By default, all states are enabled.
4040e751525SEric Saxe */
4050e751525SEric Saxe void
cpupm_disable(uint32_t state)4060e751525SEric Saxe cpupm_disable(uint32_t state)
4070e751525SEric Saxe {
4080e751525SEric Saxe
4090e751525SEric Saxe if (state & CPUPM_P_STATES) {
4100e751525SEric Saxe cpupm_free_domains(&cpupm_pstate_domains);
4110e751525SEric Saxe }
4120e751525SEric Saxe if (state & CPUPM_T_STATES) {
4130e751525SEric Saxe cpupm_free_domains(&cpupm_tstate_domains);
4140e751525SEric Saxe }
4150e751525SEric Saxe if (state & CPUPM_C_STATES) {
4160e751525SEric Saxe cpupm_free_domains(&cpupm_cstate_domains);
4170e751525SEric Saxe }
4180e751525SEric Saxe cpupm_enabled &= ~state;
4190e751525SEric Saxe }
4200e751525SEric Saxe
4210e751525SEric Saxe /*
4220e751525SEric Saxe * Allocate power domains for C,P and T States
4230e751525SEric Saxe */
4240e751525SEric Saxe void
cpupm_alloc_domains(cpu_t * cp,int state)4250e751525SEric Saxe cpupm_alloc_domains(cpu_t *cp, int state)
4260e751525SEric Saxe {
4270e751525SEric Saxe cpupm_mach_state_t *mach_state =
4280e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
4290e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
4300e751525SEric Saxe cpupm_state_domains_t **dom_ptr;
4310e751525SEric Saxe cpupm_state_domains_t *dptr;
4320e751525SEric Saxe cpupm_state_domains_t **mach_dom_state_ptr;
4330e751525SEric Saxe uint32_t domain;
4340e751525SEric Saxe uint32_t type;
4350e751525SEric Saxe
4360e751525SEric Saxe switch (state) {
4370e751525SEric Saxe case CPUPM_P_STATES:
4380e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) {
4390e751525SEric Saxe domain = CPU_ACPI_PSD(handle).sd_domain;
4400e751525SEric Saxe type = CPU_ACPI_PSD(handle).sd_type;
4410e751525SEric Saxe } else {
442*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) {
443*a3114836SGerry Liu domain = cpuid_get_chipid(cp);
444*a3114836SGerry Liu } else {
4450e751525SEric Saxe mutex_enter(&cpu_lock);
4460e751525SEric Saxe domain = cpuid_get_chipid(cp);
4470e751525SEric Saxe mutex_exit(&cpu_lock);
448*a3114836SGerry Liu }
4490e751525SEric Saxe type = CPU_ACPI_HW_ALL;
4500e751525SEric Saxe }
4510e751525SEric Saxe dom_ptr = &cpupm_pstate_domains;
4520e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain;
4530e751525SEric Saxe break;
4540e751525SEric Saxe case CPUPM_T_STATES:
4550e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) {
4560e751525SEric Saxe domain = CPU_ACPI_TSD(handle).sd_domain;
4570e751525SEric Saxe type = CPU_ACPI_TSD(handle).sd_type;
4580e751525SEric Saxe } else {
459*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) {
460*a3114836SGerry Liu domain = cpuid_get_chipid(cp);
461*a3114836SGerry Liu } else {
4620e751525SEric Saxe mutex_enter(&cpu_lock);
4630e751525SEric Saxe domain = cpuid_get_chipid(cp);
4640e751525SEric Saxe mutex_exit(&cpu_lock);
465*a3114836SGerry Liu }
4660e751525SEric Saxe type = CPU_ACPI_HW_ALL;
4670e751525SEric Saxe }
4680e751525SEric Saxe dom_ptr = &cpupm_tstate_domains;
4690e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain;
4700e751525SEric Saxe break;
4710e751525SEric Saxe case CPUPM_C_STATES:
4720e751525SEric Saxe if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) {
4730e751525SEric Saxe domain = CPU_ACPI_CSD(handle).sd_domain;
4740e751525SEric Saxe type = CPU_ACPI_CSD(handle).sd_type;
4750e751525SEric Saxe } else {
476*a3114836SGerry Liu if (MUTEX_HELD(&cpu_lock)) {
477*a3114836SGerry Liu domain = cpuid_get_coreid(cp);
478*a3114836SGerry Liu } else {
4790e751525SEric Saxe mutex_enter(&cpu_lock);
4800e751525SEric Saxe domain = cpuid_get_coreid(cp);
4810e751525SEric Saxe mutex_exit(&cpu_lock);
482*a3114836SGerry Liu }
4830e751525SEric Saxe type = CPU_ACPI_HW_ALL;
4840e751525SEric Saxe }
4850e751525SEric Saxe dom_ptr = &cpupm_cstate_domains;
4860e751525SEric Saxe mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain;
4870e751525SEric Saxe break;
4880e751525SEric Saxe default:
4890e751525SEric Saxe return;
4900e751525SEric Saxe }
4910e751525SEric Saxe
4920e751525SEric Saxe for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
4930e751525SEric Saxe if (dptr->pm_domain == domain)
4940e751525SEric Saxe break;
4950e751525SEric Saxe }
4960e751525SEric Saxe
4970e751525SEric Saxe /* new domain is created and linked at the head */
4980e751525SEric Saxe if (dptr == NULL) {
4990e751525SEric Saxe dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP);
5000e751525SEric Saxe dptr->pm_domain = domain;
5010e751525SEric Saxe dptr->pm_type = type;
5020e751525SEric Saxe dptr->pm_next = *dom_ptr;
5030e751525SEric Saxe mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN,
5040e751525SEric Saxe (void *)ipltospl(DISP_LEVEL));
5050e751525SEric Saxe CPUSET_ZERO(dptr->pm_cpus);
5060e751525SEric Saxe *dom_ptr = dptr;
5070e751525SEric Saxe }
5080e751525SEric Saxe CPUSET_ADD(dptr->pm_cpus, cp->cpu_id);
5090e751525SEric Saxe *mach_dom_state_ptr = dptr;
5100e751525SEric Saxe }
5110e751525SEric Saxe
5120e751525SEric Saxe /*
5130e751525SEric Saxe * Free C, P or T state power domains
5140e751525SEric Saxe */
5150e751525SEric Saxe void
cpupm_free_domains(cpupm_state_domains_t ** dom_ptr)5160e751525SEric Saxe cpupm_free_domains(cpupm_state_domains_t **dom_ptr)
5170e751525SEric Saxe {
5180e751525SEric Saxe cpupm_state_domains_t *this_domain, *next_domain;
5190e751525SEric Saxe
5200e751525SEric Saxe this_domain = *dom_ptr;
5210e751525SEric Saxe while (this_domain != NULL) {
5220e751525SEric Saxe next_domain = this_domain->pm_next;
5230e751525SEric Saxe mutex_destroy(&this_domain->pm_lock);
5240e751525SEric Saxe kmem_free((void *)this_domain,
5250e751525SEric Saxe sizeof (cpupm_state_domains_t));
5260e751525SEric Saxe this_domain = next_domain;
5270e751525SEric Saxe }
5280e751525SEric Saxe *dom_ptr = NULL;
5290e751525SEric Saxe }
5300e751525SEric Saxe
531444f66e7SMark Haywood /*
532444f66e7SMark Haywood * Remove CPU from C, P or T state power domains
533444f66e7SMark Haywood */
534444f66e7SMark Haywood void
cpupm_remove_domains(cpu_t * cp,int state,cpupm_state_domains_t ** dom_ptr)535444f66e7SMark Haywood cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr)
536444f66e7SMark Haywood {
537444f66e7SMark Haywood cpupm_mach_state_t *mach_state =
538444f66e7SMark Haywood (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
539444f66e7SMark Haywood cpupm_state_domains_t *dptr;
540444f66e7SMark Haywood uint32_t pm_domain;
541444f66e7SMark Haywood
542444f66e7SMark Haywood ASSERT(mach_state);
543444f66e7SMark Haywood
544444f66e7SMark Haywood switch (state) {
545444f66e7SMark Haywood case CPUPM_P_STATES:
546444f66e7SMark Haywood pm_domain = mach_state->ms_pstate.cma_domain->pm_domain;
547444f66e7SMark Haywood break;
548444f66e7SMark Haywood case CPUPM_T_STATES:
549444f66e7SMark Haywood pm_domain = mach_state->ms_tstate.cma_domain->pm_domain;
550444f66e7SMark Haywood break;
551444f66e7SMark Haywood case CPUPM_C_STATES:
552444f66e7SMark Haywood pm_domain = mach_state->ms_cstate.cma_domain->pm_domain;
553444f66e7SMark Haywood break;
554444f66e7SMark Haywood default:
555444f66e7SMark Haywood return;
556444f66e7SMark Haywood }
557444f66e7SMark Haywood
558444f66e7SMark Haywood /*
559444f66e7SMark Haywood * Find the CPU C, P or T state power domain
560444f66e7SMark Haywood */
561444f66e7SMark Haywood for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
562444f66e7SMark Haywood if (dptr->pm_domain == pm_domain)
563444f66e7SMark Haywood break;
564444f66e7SMark Haywood }
565444f66e7SMark Haywood
566444f66e7SMark Haywood /*
567444f66e7SMark Haywood * return if no matched domain found
568444f66e7SMark Haywood */
569444f66e7SMark Haywood if (dptr == NULL)
570444f66e7SMark Haywood return;
571444f66e7SMark Haywood
572444f66e7SMark Haywood /*
573444f66e7SMark Haywood * We found one matched power domain, remove CPU from its cpuset.
5746af9d452Saubrey.li@intel.com * pm_lock(spin lock) here to avoid the race conditions between
575444f66e7SMark Haywood * event change notification and cpu remove.
576444f66e7SMark Haywood */
577444f66e7SMark Haywood mutex_enter(&dptr->pm_lock);
578444f66e7SMark Haywood if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id))
579444f66e7SMark Haywood CPUSET_DEL(dptr->pm_cpus, cp->cpu_id);
580444f66e7SMark Haywood mutex_exit(&dptr->pm_lock);
581444f66e7SMark Haywood }
582444f66e7SMark Haywood
5830e751525SEric Saxe void
cpupm_alloc_ms_cstate(cpu_t * cp)5840e751525SEric Saxe cpupm_alloc_ms_cstate(cpu_t *cp)
5850e751525SEric Saxe {
5860e751525SEric Saxe cpupm_mach_state_t *mach_state;
5870e751525SEric Saxe cpupm_mach_acpi_state_t *ms_cstate;
5880e751525SEric Saxe
5890e751525SEric Saxe mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
5900e751525SEric Saxe ms_cstate = &mach_state->ms_cstate;
5910e751525SEric Saxe ASSERT(ms_cstate->cma_state.cstate == NULL);
5920e751525SEric Saxe ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t),
5930e751525SEric Saxe KM_SLEEP);
5940e751525SEric Saxe ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1;
5950e751525SEric Saxe }
5960e751525SEric Saxe
5970e751525SEric Saxe void
cpupm_free_ms_cstate(cpu_t * cp)5980e751525SEric Saxe cpupm_free_ms_cstate(cpu_t *cp)
5990e751525SEric Saxe {
6000e751525SEric Saxe cpupm_mach_state_t *mach_state =
6010e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6020e751525SEric Saxe cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate;
6030e751525SEric Saxe
6040e751525SEric Saxe if (ms_cstate->cma_state.cstate != NULL) {
6050e751525SEric Saxe kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t));
6060e751525SEric Saxe ms_cstate->cma_state.cstate = NULL;
6070e751525SEric Saxe }
6080e751525SEric Saxe }
6090e751525SEric Saxe
6100e751525SEric Saxe void
cpupm_state_change(cpu_t * cp,int level,int state)6110e751525SEric Saxe cpupm_state_change(cpu_t *cp, int level, int state)
6120e751525SEric Saxe {
6130e751525SEric Saxe cpupm_mach_state_t *mach_state =
6140e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6150e751525SEric Saxe cpupm_state_ops_t *state_ops;
6160e751525SEric Saxe cpupm_state_domains_t *state_domain;
6170e751525SEric Saxe cpuset_t set;
6180e751525SEric Saxe
6190e751525SEric Saxe DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level);
6200e751525SEric Saxe
6210e751525SEric Saxe if (mach_state == NULL) {
6220e751525SEric Saxe return;
6230e751525SEric Saxe }
6240e751525SEric Saxe
6250e751525SEric Saxe switch (state) {
6260e751525SEric Saxe case CPUPM_P_STATES:
6270e751525SEric Saxe state_ops = mach_state->ms_pstate.cma_ops;
6280e751525SEric Saxe state_domain = mach_state->ms_pstate.cma_domain;
6290e751525SEric Saxe break;
6300e751525SEric Saxe case CPUPM_T_STATES:
6310e751525SEric Saxe state_ops = mach_state->ms_tstate.cma_ops;
6320e751525SEric Saxe state_domain = mach_state->ms_tstate.cma_domain;
6330e751525SEric Saxe break;
6340e751525SEric Saxe default:
6350e751525SEric Saxe break;
6360e751525SEric Saxe }
6370e751525SEric Saxe
6380e751525SEric Saxe switch (state_domain->pm_type) {
6390e751525SEric Saxe case CPU_ACPI_SW_ANY:
6400e751525SEric Saxe /*
6410e751525SEric Saxe * A request on any CPU in the domain transitions the domain
6420e751525SEric Saxe */
6430e751525SEric Saxe CPUSET_ONLY(set, cp->cpu_id);
6440e751525SEric Saxe state_ops->cpus_change(set, level);
6450e751525SEric Saxe break;
6460e751525SEric Saxe case CPU_ACPI_SW_ALL:
6470e751525SEric Saxe /*
6480e751525SEric Saxe * All CPUs in the domain must request the transition
6490e751525SEric Saxe */
6500e751525SEric Saxe case CPU_ACPI_HW_ALL:
6510e751525SEric Saxe /*
6520e751525SEric Saxe * P/T-state transitions are coordinated by the hardware
6530e751525SEric Saxe * For now, request the transition on all CPUs in the domain,
6540e751525SEric Saxe * but looking ahead we can probably be smarter about this.
6550e751525SEric Saxe */
6560e751525SEric Saxe mutex_enter(&state_domain->pm_lock);
6570e751525SEric Saxe state_ops->cpus_change(state_domain->pm_cpus, level);
6580e751525SEric Saxe mutex_exit(&state_domain->pm_lock);
6590e751525SEric Saxe break;
6600e751525SEric Saxe default:
66100f97612SMark Haywood cmn_err(CE_NOTE, "Unknown domain coordination type: %d",
6620e751525SEric Saxe state_domain->pm_type);
6630e751525SEric Saxe }
6640e751525SEric Saxe }
6650e751525SEric Saxe
6660e751525SEric Saxe /*
6670e751525SEric Saxe * CPU PM interfaces exposed to the CPU power manager
6680e751525SEric Saxe */
6690e751525SEric Saxe /*ARGSUSED*/
6700e751525SEric Saxe id_t
cpupm_plat_domain_id(cpu_t * cp,cpupm_dtype_t type)6710e751525SEric Saxe cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type)
6720e751525SEric Saxe {
6730e751525SEric Saxe cpupm_mach_state_t *mach_state =
6740e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6750e751525SEric Saxe
6760e751525SEric Saxe if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) &&
6770e751525SEric Saxe !cpupm_is_enabled(CPUPM_C_STATES))) {
6780e751525SEric Saxe return (CPUPM_NO_DOMAIN);
6790e751525SEric Saxe }
6800e751525SEric Saxe if (type == CPUPM_DTYPE_ACTIVE) {
6810e751525SEric Saxe /*
6820e751525SEric Saxe * Return P-State domain for the specified CPU
6830e751525SEric Saxe */
6840e751525SEric Saxe if (mach_state->ms_pstate.cma_domain) {
6850e751525SEric Saxe return (mach_state->ms_pstate.cma_domain->pm_domain);
6860e751525SEric Saxe }
6870e751525SEric Saxe } else if (type == CPUPM_DTYPE_IDLE) {
6880e751525SEric Saxe /*
6890e751525SEric Saxe * Return C-State domain for the specified CPU
6900e751525SEric Saxe */
6910e751525SEric Saxe if (mach_state->ms_cstate.cma_domain) {
6920e751525SEric Saxe return (mach_state->ms_cstate.cma_domain->pm_domain);
6930e751525SEric Saxe }
6940e751525SEric Saxe }
6950e751525SEric Saxe return (CPUPM_NO_DOMAIN);
6960e751525SEric Saxe }
6970e751525SEric Saxe
6980e751525SEric Saxe /*ARGSUSED*/
6990e751525SEric Saxe uint_t
cpupm_plat_state_enumerate(cpu_t * cp,cpupm_dtype_t type,cpupm_state_t * states)7000e751525SEric Saxe cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type,
7010e751525SEric Saxe cpupm_state_t *states)
7020e751525SEric Saxe {
7030e751525SEric Saxe int *speeds;
7040e751525SEric Saxe uint_t nspeeds, i;
7050e751525SEric Saxe
7060e751525SEric Saxe /*
7070e751525SEric Saxe * Idle domain support unimplemented
7080e751525SEric Saxe */
7090e751525SEric Saxe if (type != CPUPM_DTYPE_ACTIVE) {
7100e751525SEric Saxe return (0);
7110e751525SEric Saxe }
7120e751525SEric Saxe nspeeds = cpupm_get_speeds(cp, &speeds);
7130e751525SEric Saxe
7140e751525SEric Saxe /*
7150e751525SEric Saxe * If the caller passes NULL for states, just return the
7160e751525SEric Saxe * number of states.
7170e751525SEric Saxe */
7180e751525SEric Saxe if (states != NULL) {
7190e751525SEric Saxe for (i = 0; i < nspeeds; i++) {
7200e751525SEric Saxe states[i].cps_speed = speeds[i];
7210e751525SEric Saxe states[i].cps_handle = (cpupm_handle_t)i;
7220e751525SEric Saxe }
7230e751525SEric Saxe }
7240e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds);
7250e751525SEric Saxe return (nspeeds);
7260e751525SEric Saxe }
7270e751525SEric Saxe
7280e751525SEric Saxe /*ARGSUSED*/
7290e751525SEric Saxe int
cpupm_plat_change_state(cpu_t * cp,cpupm_state_t * state)7300e751525SEric Saxe cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state)
7310e751525SEric Saxe {
732444f66e7SMark Haywood if (!cpupm_is_ready(cp))
7330e751525SEric Saxe return (-1);
7340e751525SEric Saxe
7350e751525SEric Saxe cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES);
7360e751525SEric Saxe
7370e751525SEric Saxe return (0);
7380e751525SEric Saxe }
7390e751525SEric Saxe
7400e751525SEric Saxe /*ARGSUSED*/
7410e751525SEric Saxe /*
7420e751525SEric Saxe * Note: It is the responsibility of the users of
7430e751525SEric Saxe * cpupm_get_speeds() to free the memory allocated
7440e751525SEric Saxe * for speeds using cpupm_free_speeds()
7450e751525SEric Saxe */
7460e751525SEric Saxe uint_t
cpupm_get_speeds(cpu_t * cp,int ** speeds)7470e751525SEric Saxe cpupm_get_speeds(cpu_t *cp, int **speeds)
7480e751525SEric Saxe {
7490e751525SEric Saxe #ifndef __xpv
7500e751525SEric Saxe cpupm_mach_state_t *mach_state =
7510e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
7520e751525SEric Saxe return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds));
7530e751525SEric Saxe #else
7540e751525SEric Saxe return (0);
7550e751525SEric Saxe #endif
7560e751525SEric Saxe }
7570e751525SEric Saxe
7580e751525SEric Saxe /*ARGSUSED*/
7590e751525SEric Saxe void
cpupm_free_speeds(int * speeds,uint_t nspeeds)7600e751525SEric Saxe cpupm_free_speeds(int *speeds, uint_t nspeeds)
7610e751525SEric Saxe {
7620e751525SEric Saxe #ifndef __xpv
7630e751525SEric Saxe cpu_acpi_free_speeds(speeds, nspeeds);
7640e751525SEric Saxe #endif
7650e751525SEric Saxe }
7660e751525SEric Saxe
7670e751525SEric Saxe /*
7680e751525SEric Saxe * All CPU instances have been initialized successfully.
7690e751525SEric Saxe */
7700e751525SEric Saxe boolean_t
cpupm_power_ready(cpu_t * cp)771444f66e7SMark Haywood cpupm_power_ready(cpu_t *cp)
7720e751525SEric Saxe {
773444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp));
7740e751525SEric Saxe }
7750e751525SEric Saxe
7760e751525SEric Saxe /*
7770e751525SEric Saxe * All CPU instances have been initialized successfully.
7780e751525SEric Saxe */
7790e751525SEric Saxe boolean_t
cpupm_throttle_ready(cpu_t * cp)780444f66e7SMark Haywood cpupm_throttle_ready(cpu_t *cp)
7810e751525SEric Saxe {
782444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp));
7830e751525SEric Saxe }
7840e751525SEric Saxe
7850e751525SEric Saxe /*
7860e751525SEric Saxe * All CPU instances have been initialized successfully.
7870e751525SEric Saxe */
7880e751525SEric Saxe boolean_t
cpupm_cstate_ready(cpu_t * cp)789444f66e7SMark Haywood cpupm_cstate_ready(cpu_t *cp)
7900e751525SEric Saxe {
791444f66e7SMark Haywood return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp));
7920e751525SEric Saxe }
7930e751525SEric Saxe
7940e751525SEric Saxe void
cpupm_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)7950e751525SEric Saxe cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
7960e751525SEric Saxe {
7970e751525SEric Saxe cpu_t *cp = ctx;
7980e751525SEric Saxe cpupm_mach_state_t *mach_state =
7990e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
8000e751525SEric Saxe cpupm_notification_t *entry;
8010e751525SEric Saxe
8020e751525SEric Saxe mutex_enter(&mach_state->ms_lock);
8030e751525SEric Saxe for (entry = mach_state->ms_handlers; entry != NULL;
8040e751525SEric Saxe entry = entry->nq_next) {
8050e751525SEric Saxe entry->nq_handler(obj, val, entry->nq_ctx);
8060e751525SEric Saxe }
8070e751525SEric Saxe mutex_exit(&mach_state->ms_lock);
8080e751525SEric Saxe }
8090e751525SEric Saxe
8100e751525SEric Saxe /*ARGSUSED*/
8110e751525SEric Saxe void
cpupm_add_notify_handler(cpu_t * cp,CPUPM_NOTIFY_HANDLER handler,void * ctx)8120e751525SEric Saxe cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx)
8130e751525SEric Saxe {
8140e751525SEric Saxe #ifndef __xpv
8150e751525SEric Saxe cpupm_mach_state_t *mach_state =
8160e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8170e751525SEric Saxe cpupm_notification_t *entry;
8180e751525SEric Saxe
8190e751525SEric Saxe entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP);
8200e751525SEric Saxe entry->nq_handler = handler;
8210e751525SEric Saxe entry->nq_ctx = ctx;
8220e751525SEric Saxe mutex_enter(&mach_state->ms_lock);
8230e751525SEric Saxe if (mach_state->ms_handlers == NULL) {
8240e751525SEric Saxe entry->nq_next = NULL;
8250e751525SEric Saxe mach_state->ms_handlers = entry;
8260e751525SEric Saxe cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle,
8270e751525SEric Saxe cpupm_notify_handler, cp);
8280e751525SEric Saxe
8290e751525SEric Saxe } else {
8300e751525SEric Saxe entry->nq_next = mach_state->ms_handlers;
8310e751525SEric Saxe mach_state->ms_handlers = entry;
8320e751525SEric Saxe }
8330e751525SEric Saxe mutex_exit(&mach_state->ms_lock);
8340e751525SEric Saxe #endif
8350e751525SEric Saxe }
8360e751525SEric Saxe
8370e751525SEric Saxe /*ARGSUSED*/
8380e751525SEric Saxe static void
cpupm_free_notify_handlers(cpu_t * cp)8390e751525SEric Saxe cpupm_free_notify_handlers(cpu_t *cp)
8400e751525SEric Saxe {
8410e751525SEric Saxe #ifndef __xpv
8420e751525SEric Saxe cpupm_mach_state_t *mach_state =
8430e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8440e751525SEric Saxe cpupm_notification_t *entry;
8450e751525SEric Saxe cpupm_notification_t *next;
8460e751525SEric Saxe
8470e751525SEric Saxe mutex_enter(&mach_state->ms_lock);
8480e751525SEric Saxe if (mach_state->ms_handlers == NULL) {
8490e751525SEric Saxe mutex_exit(&mach_state->ms_lock);
8500e751525SEric Saxe return;
8510e751525SEric Saxe }
8520e751525SEric Saxe if (mach_state->ms_acpi_handle != NULL) {
8530e751525SEric Saxe cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle,
8540e751525SEric Saxe cpupm_notify_handler);
8550e751525SEric Saxe }
8560e751525SEric Saxe entry = mach_state->ms_handlers;
8570e751525SEric Saxe while (entry != NULL) {
8580e751525SEric Saxe next = entry->nq_next;
8590e751525SEric Saxe kmem_free(entry, sizeof (cpupm_notification_t));
8600e751525SEric Saxe entry = next;
8610e751525SEric Saxe }
8620e751525SEric Saxe mach_state->ms_handlers = NULL;
8630e751525SEric Saxe mutex_exit(&mach_state->ms_lock);
8640e751525SEric Saxe #endif
8650e751525SEric Saxe }
8660e751525SEric Saxe
8670e751525SEric Saxe /*
8680e751525SEric Saxe * Get the current max speed from the ACPI _PPC object
8690e751525SEric Saxe */
8700e751525SEric Saxe /*ARGSUSED*/
8710e751525SEric Saxe int
cpupm_get_top_speed(cpu_t * cp)8720e751525SEric Saxe cpupm_get_top_speed(cpu_t *cp)
8730e751525SEric Saxe {
8740e751525SEric Saxe #ifndef __xpv
8750e751525SEric Saxe cpupm_mach_state_t *mach_state;
8760e751525SEric Saxe cpu_acpi_handle_t handle;
8770e751525SEric Saxe int plat_level;
8780e751525SEric Saxe uint_t nspeeds;
8790e751525SEric Saxe int max_level;
8800e751525SEric Saxe
8810e751525SEric Saxe mach_state =
8820e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8830e751525SEric Saxe handle = mach_state->ms_acpi_handle;
8840e751525SEric Saxe
8850e751525SEric Saxe cpu_acpi_cache_ppc(handle);
8860e751525SEric Saxe plat_level = CPU_ACPI_PPC(handle);
8870e751525SEric Saxe
8880e751525SEric Saxe nspeeds = CPU_ACPI_PSTATES_COUNT(handle);
8890e751525SEric Saxe
8900e751525SEric Saxe max_level = nspeeds - 1;
8910e751525SEric Saxe if ((plat_level < 0) || (plat_level > max_level)) {
8920e751525SEric Saxe cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: "
8930e751525SEric Saxe "_PPC out of range %d", cp->cpu_id, plat_level);
8940e751525SEric Saxe plat_level = 0;
8950e751525SEric Saxe }
8960e751525SEric Saxe
8970e751525SEric Saxe return (plat_level);
8980e751525SEric Saxe #else
8990e751525SEric Saxe return (0);
9000e751525SEric Saxe #endif
9010e751525SEric Saxe }
9020e751525SEric Saxe
9030e751525SEric Saxe /*
9040e751525SEric Saxe * This notification handler is called whenever the ACPI _PPC
9050e751525SEric Saxe * object changes. The _PPC is a sort of governor on power levels.
9060e751525SEric Saxe * It sets an upper threshold on which, _PSS defined, power levels
9070e751525SEric Saxe * are usuable. The _PPC value is dynamic and may change as properties
9080e751525SEric Saxe * (i.e., thermal or AC source) of the system change.
9090e751525SEric Saxe */
9100e751525SEric Saxe
9110e751525SEric Saxe static void
cpupm_power_manage_notifications(void * ctx)9120e751525SEric Saxe cpupm_power_manage_notifications(void *ctx)
9130e751525SEric Saxe {
9140e751525SEric Saxe cpu_t *cp = ctx;
9150e751525SEric Saxe int top_speed;
9160e751525SEric Saxe
9170e751525SEric Saxe top_speed = cpupm_get_top_speed(cp);
9180e751525SEric Saxe cpupm_redefine_max_activepwr_state(cp, top_speed);
9190e751525SEric Saxe }
9200e751525SEric Saxe
9210e751525SEric Saxe /* ARGSUSED */
9220e751525SEric Saxe static void
cpupm_event_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)9230e751525SEric Saxe cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
9240e751525SEric Saxe {
9250e751525SEric Saxe #ifndef __xpv
926d218c8f0SMark Haywood
927d218c8f0SMark Haywood cpu_t *cp = ctx;
928d218c8f0SMark Haywood cpupm_mach_state_t *mach_state =
929d218c8f0SMark Haywood (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
930d218c8f0SMark Haywood
931d218c8f0SMark Haywood if (mach_state == NULL)
932d218c8f0SMark Haywood return;
933d218c8f0SMark Haywood
9340e751525SEric Saxe /*
9350e751525SEric Saxe * Currently, we handle _TPC,_CST and _PPC change notifications.
9360e751525SEric Saxe */
937d218c8f0SMark Haywood if (val == CPUPM_TPC_CHANGE_NOTIFICATION &&
938d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_T_STATES) {
9390e751525SEric Saxe cpupm_throttle_manage_notification(ctx);
940d218c8f0SMark Haywood } else if (val == CPUPM_CST_CHANGE_NOTIFICATION &&
941d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_C_STATES) {
9420e751525SEric Saxe cpuidle_manage_cstates(ctx);
943d218c8f0SMark Haywood } else if (val == CPUPM_PPC_CHANGE_NOTIFICATION &&
944d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_P_STATES) {
9450e751525SEric Saxe cpupm_power_manage_notifications(ctx);
9460e751525SEric Saxe }
9470e751525SEric Saxe #endif
9480e751525SEric Saxe }
9490e751525SEric Saxe
9500e751525SEric Saxe /*
9510e751525SEric Saxe * Update cpupm cstate data each time CPU exits idle.
9520e751525SEric Saxe */
9530e751525SEric Saxe void
cpupm_wakeup_cstate_data(cma_c_state_t * cs_data,hrtime_t end)9540e751525SEric Saxe cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end)
9550e751525SEric Saxe {
9560e751525SEric Saxe cs_data->cs_idle_exit = end;
9570e751525SEric Saxe }
9580e751525SEric Saxe
9590e751525SEric Saxe /*
9600e751525SEric Saxe * Determine next cstate based on cpupm data.
9610e751525SEric Saxe * Update cpupm cstate data each time CPU goes idle.
9620e751525SEric Saxe * Do as much as possible in the idle state bookkeeping function because the
9630e751525SEric Saxe * performance impact while idle is minimal compared to in the wakeup function
9640e751525SEric Saxe * when there is real work to do.
9650e751525SEric Saxe */
9660e751525SEric Saxe uint32_t
cpupm_next_cstate(cma_c_state_t * cs_data,cpu_acpi_cstate_t * cstates,uint32_t cs_count,hrtime_t start)9679aa01d98SBill Holler cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates,
9689aa01d98SBill Holler uint32_t cs_count, hrtime_t start)
9690e751525SEric Saxe {
9700e751525SEric Saxe hrtime_t duration;
9710e751525SEric Saxe hrtime_t ave_interval;
9720e751525SEric Saxe hrtime_t ave_idle_time;
9730fc6188aSaubrey.li@intel.com uint32_t i, smpl_cnt;
9740e751525SEric Saxe
9750e751525SEric Saxe duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter;
9760e751525SEric Saxe scalehrtime(&duration);
9770e751525SEric Saxe cs_data->cs_idle += duration;
9780e751525SEric Saxe cs_data->cs_idle_enter = start;
9790e751525SEric Saxe
9800fc6188aSaubrey.li@intel.com smpl_cnt = ++cs_data->cs_cnt;
9810e751525SEric Saxe cs_data->cs_smpl_len = start - cs_data->cs_smpl_start;
9820e751525SEric Saxe scalehrtime(&cs_data->cs_smpl_len);
9830fc6188aSaubrey.li@intel.com if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) {
9840e751525SEric Saxe cs_data->cs_smpl_idle = cs_data->cs_idle;
9850e751525SEric Saxe cs_data->cs_idle = 0;
9860e751525SEric Saxe cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) /
9870e751525SEric Saxe cs_data->cs_smpl_len);
9880e751525SEric Saxe
9890e751525SEric Saxe cs_data->cs_smpl_start = start;
9900e751525SEric Saxe cs_data->cs_cnt = 0;
9910e751525SEric Saxe
9920e751525SEric Saxe /*
9930e751525SEric Saxe * Strand level C-state policy
9949aa01d98SBill Holler * The cpu_acpi_cstate_t *cstates array is not required to
9959aa01d98SBill Holler * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3.
9969aa01d98SBill Holler * There are cs_count entries in the cstates array.
9979aa01d98SBill Holler * cs_data->cs_next_cstate contains the index of the next
9989aa01d98SBill Holler * C-state this CPU should enter.
9990e751525SEric Saxe */
10009aa01d98SBill Holler ASSERT(cstates[0].cs_type == CPU_ACPI_C1);
10010e751525SEric Saxe
10020e751525SEric Saxe /*
10030e751525SEric Saxe * Will CPU be idle long enough to save power?
10040e751525SEric Saxe */
10050fc6188aSaubrey.li@intel.com ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000;
10069aa01d98SBill Holler for (i = 1; i < cs_count; ++i) {
10079aa01d98SBill Holler if (ave_idle_time < (cstates[i].cs_latency *
10080e751525SEric Saxe cpupm_cs_idle_save_tunable)) {
10099aa01d98SBill Holler cs_count = i;
10109aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
10119aa01d98SBill Holler CPU, int, i);
10129aa01d98SBill Holler }
10130e751525SEric Saxe }
10140e751525SEric Saxe
10150e751525SEric Saxe /*
10160e751525SEric Saxe * Wakeup often (even when non-idle time is very short)?
10170e751525SEric Saxe * Some producer/consumer type loads fall into this category.
10180e751525SEric Saxe */
10190fc6188aSaubrey.li@intel.com ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000;
10209aa01d98SBill Holler for (i = 1; i < cs_count; ++i) {
10219aa01d98SBill Holler if (ave_interval <= (cstates[i].cs_latency *
10229aa01d98SBill Holler cpupm_cs_idle_cost_tunable)) {
10239aa01d98SBill Holler cs_count = i;
10249aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
10259aa01d98SBill Holler CPU, int, (CPU_MAX_CSTATES + i));
10269aa01d98SBill Holler }
10270e751525SEric Saxe }
10280e751525SEric Saxe
10290e751525SEric Saxe /*
10300e751525SEric Saxe * Idle percent
10310e751525SEric Saxe */
10329aa01d98SBill Holler for (i = 1; i < cs_count; ++i) {
10339aa01d98SBill Holler switch (cstates[i].cs_type) {
10349aa01d98SBill Holler case CPU_ACPI_C2:
10359aa01d98SBill Holler if (cs_data->cs_smpl_idle_pct <
10369aa01d98SBill Holler cpupm_C2_idle_pct_tunable) {
10379aa01d98SBill Holler cs_count = i;
10389aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate,
10399aa01d98SBill Holler cpu_t *, CPU, int,
10409aa01d98SBill Holler ((2 * CPU_MAX_CSTATES) + i));
10410e751525SEric Saxe }
10429aa01d98SBill Holler break;
10439aa01d98SBill Holler
10449aa01d98SBill Holler case CPU_ACPI_C3:
10459aa01d98SBill Holler if (cs_data->cs_smpl_idle_pct <
10469aa01d98SBill Holler cpupm_C3_idle_pct_tunable) {
10479aa01d98SBill Holler cs_count = i;
10489aa01d98SBill Holler DTRACE_PROBE2(cpupm__next__cstate,
10499aa01d98SBill Holler cpu_t *, CPU, int,
10509aa01d98SBill Holler ((2 * CPU_MAX_CSTATES) + i));
10519aa01d98SBill Holler }
10529aa01d98SBill Holler break;
10539aa01d98SBill Holler }
10549aa01d98SBill Holler }
10559aa01d98SBill Holler
10569aa01d98SBill Holler cs_data->cs_next_cstate = cs_count - 1;
10570e751525SEric Saxe }
10580e751525SEric Saxe
10590e751525SEric Saxe return (cs_data->cs_next_cstate);
10600e751525SEric Saxe }
1061