10e751525SEric Saxe /*
20e751525SEric Saxe * CDDL HEADER START
30e751525SEric Saxe *
40e751525SEric Saxe * The contents of this file are subject to the terms of the
50e751525SEric Saxe * Common Development and Distribution License (the "License").
60e751525SEric Saxe * You may not use this file except in compliance with the License.
70e751525SEric Saxe *
80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe * See the License for the specific language governing permissions
110e751525SEric Saxe * and limitations under the License.
120e751525SEric Saxe *
130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe *
190e751525SEric Saxe * CDDL HEADER END
200e751525SEric Saxe */
210e751525SEric Saxe /*
220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230e751525SEric Saxe * Use is subject to license terms.
240e751525SEric Saxe */
25444f66e7SMark Haywood /*
26444f66e7SMark Haywood * Copyright (c) 2009, Intel Corporation.
27444f66e7SMark Haywood * All Rights Reserved.
28444f66e7SMark Haywood */
290e751525SEric Saxe
300e751525SEric Saxe /*
310e751525SEric Saxe * CPU power management driver support for i86pc.
320e751525SEric Saxe */
330e751525SEric Saxe
340e751525SEric Saxe #include <sys/ddi.h>
350e751525SEric Saxe #include <sys/sunddi.h>
360e751525SEric Saxe #include <sys/cpupm.h>
370e751525SEric Saxe #include <sys/cpudrv_mach.h>
380e751525SEric Saxe #include <sys/machsystm.h>
390e751525SEric Saxe #include <sys/cpu_pm.h>
400e751525SEric Saxe #include <sys/cpuvar.h>
410e751525SEric Saxe #include <sys/sdt.h>
420e751525SEric Saxe #include <sys/cpu_idle.h>
430e751525SEric Saxe
440e751525SEric Saxe /*
450e751525SEric Saxe * Note that our driver numbers the power levels from lowest to
460e751525SEric Saxe * highest starting at 1 (i.e., the lowest power level is 1 and
470e751525SEric Saxe * the highest power level is cpupm->num_spd). The x86 modules get
480e751525SEric Saxe * their power levels from ACPI which numbers power levels from
490e751525SEric Saxe * highest to lowest starting at 0 (i.e., the lowest power level
500e751525SEric Saxe * is (cpupm->num_spd - 1) and the highest power level is 0). So to
510e751525SEric Saxe * map one of our driver power levels to one understood by ACPI we
520e751525SEric Saxe * simply subtract our driver power level from cpupm->num_spd. Likewise,
530e751525SEric Saxe * to map an ACPI power level to the proper driver power level, we
540e751525SEric Saxe * subtract the ACPI power level from cpupm->num_spd.
550e751525SEric Saxe */
560e751525SEric Saxe #define PM_2_PLAT_LEVEL(cpupm, pm_level) (cpupm->num_spd - pm_level)
570e751525SEric Saxe #define PLAT_2_PM_LEVEL(cpupm, plat_level) (cpupm->num_spd - plat_level)
580e751525SEric Saxe
590e751525SEric Saxe /*
600e751525SEric Saxe * Change CPU speed using interface provided by module.
610e751525SEric Saxe */
620e751525SEric Saxe int
cpudrv_change_speed(cpudrv_devstate_t * cpudsp,cpudrv_pm_spd_t * new_spd)630e751525SEric Saxe cpudrv_change_speed(cpudrv_devstate_t *cpudsp, cpudrv_pm_spd_t *new_spd)
640e751525SEric Saxe {
650e751525SEric Saxe cpu_t *cp = cpudsp->cp;
660e751525SEric Saxe cpupm_mach_state_t *mach_state =
670e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
680e751525SEric Saxe cpudrv_pm_t *cpupm;
690e751525SEric Saxe cpuset_t set;
700e751525SEric Saxe uint32_t plat_level;
710e751525SEric Saxe
720e751525SEric Saxe if (!(mach_state->ms_caps & CPUPM_P_STATES))
730e751525SEric Saxe return (DDI_FAILURE);
740e751525SEric Saxe ASSERT(mach_state->ms_pstate.cma_ops != NULL);
750e751525SEric Saxe cpupm = &(cpudsp->cpudrv_pm);
760e751525SEric Saxe plat_level = PM_2_PLAT_LEVEL(cpupm, new_spd->pm_level);
770e751525SEric Saxe CPUSET_ONLY(set, cp->cpu_id);
780e751525SEric Saxe mach_state->ms_pstate.cma_ops->cpus_change(set, plat_level);
790e751525SEric Saxe
800e751525SEric Saxe return (DDI_SUCCESS);
810e751525SEric Saxe }
820e751525SEric Saxe
830e751525SEric Saxe /*
840e751525SEric Saxe * Determine the cpu_id for the CPU device.
850e751525SEric Saxe */
860e751525SEric Saxe boolean_t
cpudrv_get_cpu_id(dev_info_t * dip,processorid_t * cpu_id)870e751525SEric Saxe cpudrv_get_cpu_id(dev_info_t *dip, processorid_t *cpu_id)
880e751525SEric Saxe {
890e751525SEric Saxe return ((*cpu_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
900e751525SEric Saxe DDI_PROP_DONTPASS, "reg", -1)) != -1);
910e751525SEric Saxe
920e751525SEric Saxe }
930e751525SEric Saxe
940e751525SEric Saxe boolean_t
cpudrv_is_enabled(cpudrv_devstate_t * cpudsp)950e751525SEric Saxe cpudrv_is_enabled(cpudrv_devstate_t *cpudsp)
960e751525SEric Saxe {
970e751525SEric Saxe cpupm_mach_state_t *mach_state;
980e751525SEric Saxe
990e751525SEric Saxe if (!cpupm_is_enabled(CPUPM_P_STATES) || !cpudrv_enabled)
1000e751525SEric Saxe return (B_FALSE);
1010e751525SEric Saxe
1020e751525SEric Saxe /*
1030e751525SEric Saxe * Only check the instance specific setting it exists.
1040e751525SEric Saxe */
1050e751525SEric Saxe if (cpudsp != NULL && cpudsp->cp != NULL &&
1060e751525SEric Saxe cpudsp->cp->cpu_m.mcpu_pm_mach_state != NULL) {
1070e751525SEric Saxe mach_state =
1080e751525SEric Saxe (cpupm_mach_state_t *)cpudsp->cp->cpu_m.mcpu_pm_mach_state;
1090e751525SEric Saxe return (mach_state->ms_caps & CPUPM_P_STATES);
1100e751525SEric Saxe }
1110e751525SEric Saxe
1120e751525SEric Saxe return (B_TRUE);
1130e751525SEric Saxe }
1140e751525SEric Saxe
1150e751525SEric Saxe /*
1160e751525SEric Saxe * Is the current thread the thread that is handling the
1170e751525SEric Saxe * PPC change notification?
1180e751525SEric Saxe */
1190e751525SEric Saxe boolean_t
cpudrv_is_governor_thread(cpudrv_pm_t * cpupm)1200e751525SEric Saxe cpudrv_is_governor_thread(cpudrv_pm_t *cpupm)
1210e751525SEric Saxe {
1220e751525SEric Saxe return (curthread == cpupm->pm_governor_thread);
1230e751525SEric Saxe }
1240e751525SEric Saxe
1250e751525SEric Saxe /*
1260e751525SEric Saxe * This routine changes the top speed to which the CPUs can transition by:
1270e751525SEric Saxe *
1280e751525SEric Saxe * - Resetting the up_spd for all speeds lower than the new top speed
1290e751525SEric Saxe * to point to the new top speed.
1300e751525SEric Saxe * - Updating the framework with a new "normal" (maximum power) for this
1310e751525SEric Saxe * device.
1320e751525SEric Saxe */
1330e751525SEric Saxe void
cpudrv_set_topspeed(void * ctx,int plat_level)1340e751525SEric Saxe cpudrv_set_topspeed(void *ctx, int plat_level)
1350e751525SEric Saxe {
1360e751525SEric Saxe cpudrv_devstate_t *cpudsp;
1370e751525SEric Saxe cpudrv_pm_t *cpupm;
1380e751525SEric Saxe cpudrv_pm_spd_t *spd;
1390e751525SEric Saxe cpudrv_pm_spd_t *top_spd;
1400e751525SEric Saxe dev_info_t *dip;
1410e751525SEric Saxe int pm_level;
1420e751525SEric Saxe int instance;
1430e751525SEric Saxe int i;
1440e751525SEric Saxe
145*584b574aSToomas Soome top_spd = NULL;
1460e751525SEric Saxe dip = ctx;
1470e751525SEric Saxe instance = ddi_get_instance(dip);
1480e751525SEric Saxe cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1490e751525SEric Saxe ASSERT(cpudsp != NULL);
1500e751525SEric Saxe
1510e751525SEric Saxe mutex_enter(&cpudsp->lock);
1520e751525SEric Saxe cpupm = &(cpudsp->cpudrv_pm);
1530e751525SEric Saxe pm_level = PLAT_2_PM_LEVEL(cpupm, plat_level);
1540e751525SEric Saxe for (i = 0, spd = cpupm->head_spd; spd; i++, spd = spd->down_spd) {
1550e751525SEric Saxe /*
1560e751525SEric Saxe * Don't mess with speeds that are higher than the new
1570e751525SEric Saxe * top speed. They should be out of range anyway.
1580e751525SEric Saxe */
1590e751525SEric Saxe if (spd->pm_level > pm_level)
1600e751525SEric Saxe continue;
1610e751525SEric Saxe /*
1620e751525SEric Saxe * This is the new top speed.
1630e751525SEric Saxe */
1640e751525SEric Saxe if (spd->pm_level == pm_level)
1650e751525SEric Saxe top_spd = spd;
1660e751525SEric Saxe
1670e751525SEric Saxe spd->up_spd = top_spd;
1680e751525SEric Saxe }
1690e751525SEric Saxe cpupm->top_spd = top_spd;
1700e751525SEric Saxe
1710e751525SEric Saxe cpupm->pm_governor_thread = curthread;
1720e751525SEric Saxe
1730e751525SEric Saxe mutex_exit(&cpudsp->lock);
1740e751525SEric Saxe
1750e751525SEric Saxe (void) pm_update_maxpower(dip, 0, top_spd->pm_level);
1760e751525SEric Saxe }
1770e751525SEric Saxe
1780e751525SEric Saxe /*
1790e751525SEric Saxe * This routine reads the ACPI _PPC object. It's accessed as a callback
1800e751525SEric Saxe * by the ppm driver whenever a _PPC change notification is received.
1810e751525SEric Saxe */
1820e751525SEric Saxe int
cpudrv_get_topspeed(void * ctx)1830e751525SEric Saxe cpudrv_get_topspeed(void *ctx)
1840e751525SEric Saxe {
1850e751525SEric Saxe cpu_t *cp;
1860e751525SEric Saxe cpudrv_devstate_t *cpudsp;
1870e751525SEric Saxe dev_info_t *dip;
1880e751525SEric Saxe int instance;
1890e751525SEric Saxe int plat_level;
1900e751525SEric Saxe
1910e751525SEric Saxe dip = ctx;
1920e751525SEric Saxe instance = ddi_get_instance(dip);
1930e751525SEric Saxe cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1940e751525SEric Saxe ASSERT(cpudsp != NULL);
1950e751525SEric Saxe cp = cpudsp->cp;
1960e751525SEric Saxe plat_level = cpupm_get_top_speed(cp);
1970e751525SEric Saxe
1980e751525SEric Saxe return (plat_level);
1990e751525SEric Saxe }
2000e751525SEric Saxe
2010e751525SEric Saxe
2020e751525SEric Saxe /*
2030e751525SEric Saxe * This notification handler is called whenever the ACPI _PPC
2040e751525SEric Saxe * object changes. The _PPC is a sort of governor on power levels.
2050e751525SEric Saxe * It sets an upper threshold on which, _PSS defined, power levels
2060e751525SEric Saxe * are usuable. The _PPC value is dynamic and may change as properties
2070e751525SEric Saxe * (i.e., thermal or AC source) of the system change.
2080e751525SEric Saxe */
2090e751525SEric Saxe /* ARGSUSED */
2100e751525SEric Saxe static void
cpudrv_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)2110e751525SEric Saxe cpudrv_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
2120e751525SEric Saxe {
213d218c8f0SMark Haywood cpu_t *cp;
214d218c8f0SMark Haywood cpupm_mach_state_t *mach_state;
215d218c8f0SMark Haywood cpudrv_devstate_t *cpudsp;
216d218c8f0SMark Haywood dev_info_t *dip;
217d218c8f0SMark Haywood int instance;
2180e751525SEric Saxe extern pm_cpupm_t cpupm;
2190e751525SEric Saxe
220d218c8f0SMark Haywood dip = ctx;
221d218c8f0SMark Haywood instance = ddi_get_instance(dip);
222d218c8f0SMark Haywood cpudsp = ddi_get_soft_state(cpudrv_state, instance);
223d218c8f0SMark Haywood if (cpudsp == NULL)
224d218c8f0SMark Haywood return;
225d218c8f0SMark Haywood cp = cpudsp->cp;
226d218c8f0SMark Haywood mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
227d218c8f0SMark Haywood if (mach_state == NULL)
228d218c8f0SMark Haywood return;
229d218c8f0SMark Haywood
2300e751525SEric Saxe /*
2310e751525SEric Saxe * We only handle _PPC change notifications.
2320e751525SEric Saxe */
233d218c8f0SMark Haywood if (!PM_EVENT_CPUPM && val == CPUPM_PPC_CHANGE_NOTIFICATION &&
234d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_P_STATES)
2350e751525SEric Saxe cpudrv_redefine_topspeed(ctx);
2360e751525SEric Saxe }
2370e751525SEric Saxe
2380e751525SEric Saxe void
cpudrv_install_notify_handler(cpudrv_devstate_t * cpudsp)2390e751525SEric Saxe cpudrv_install_notify_handler(cpudrv_devstate_t *cpudsp)
2400e751525SEric Saxe {
2410e751525SEric Saxe cpu_t *cp = cpudsp->cp;
2420e751525SEric Saxe cpupm_add_notify_handler(cp, cpudrv_notify_handler,
2430e751525SEric Saxe cpudsp->dip);
2440e751525SEric Saxe }
2450e751525SEric Saxe
2460e751525SEric Saxe void
cpudrv_uninstall_notify_handler(cpudrv_devstate_t * cpudsp)247444f66e7SMark Haywood cpudrv_uninstall_notify_handler(cpudrv_devstate_t *cpudsp)
248444f66e7SMark Haywood {
249444f66e7SMark Haywood cpu_t *cp = cpudsp->cp;
250444f66e7SMark Haywood cpupm_notification_t *entry, **next;
251444f66e7SMark Haywood
252444f66e7SMark Haywood ASSERT(cp != NULL);
253444f66e7SMark Haywood cpupm_mach_state_t *mach_state =
254444f66e7SMark Haywood (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
255444f66e7SMark Haywood
256444f66e7SMark Haywood mutex_enter(&mach_state->ms_lock);
257444f66e7SMark Haywood if (mach_state->ms_handlers == NULL) {
258444f66e7SMark Haywood mutex_exit(&mach_state->ms_lock);
259444f66e7SMark Haywood return;
260444f66e7SMark Haywood }
261444f66e7SMark Haywood
262444f66e7SMark Haywood for (next = &mach_state->ms_handlers; (entry = *next) != NULL; ) {
263444f66e7SMark Haywood if (entry->nq_handler != cpudrv_notify_handler) {
264444f66e7SMark Haywood next = &entry->nq_next;
265444f66e7SMark Haywood continue;
266444f66e7SMark Haywood }
267444f66e7SMark Haywood *next = entry->nq_next;
268444f66e7SMark Haywood kmem_free(entry, sizeof (cpupm_notification_t));
269444f66e7SMark Haywood }
270444f66e7SMark Haywood mutex_exit(&mach_state->ms_lock);
271444f66e7SMark Haywood }
272444f66e7SMark Haywood
273444f66e7SMark Haywood void
cpudrv_redefine_topspeed(void * ctx)2740e751525SEric Saxe cpudrv_redefine_topspeed(void *ctx)
2750e751525SEric Saxe {
2760e751525SEric Saxe /*
2770e751525SEric Saxe * This should never happen, unless ppm does not get loaded.
2780e751525SEric Saxe */
2790e751525SEric Saxe if (cpupm_redefine_topspeed == NULL) {
2800e751525SEric Saxe cmn_err(CE_WARN, "cpudrv_redefine_topspeed: "
2810e751525SEric Saxe "cpupm_redefine_topspeed has not been initialized - "
2820e751525SEric Saxe "ignoring notification");
2830e751525SEric Saxe return;
2840e751525SEric Saxe }
2850e751525SEric Saxe
2860e751525SEric Saxe /*
2870e751525SEric Saxe * ppm callback needs to handle redefinition for all CPUs in
2880e751525SEric Saxe * the domain.
2890e751525SEric Saxe */
2900e751525SEric Saxe (*cpupm_redefine_topspeed)(ctx);
2910e751525SEric Saxe }
2920e751525SEric Saxe
2930e751525SEric Saxe boolean_t
cpudrv_mach_init(cpudrv_devstate_t * cpudsp)2940e751525SEric Saxe cpudrv_mach_init(cpudrv_devstate_t *cpudsp)
2950e751525SEric Saxe {
2960e751525SEric Saxe cpupm_mach_state_t *mach_state;
297444f66e7SMark Haywood int topspeed;
2980e751525SEric Saxe
299444f66e7SMark Haywood ASSERT(cpudsp->cp);
3000e751525SEric Saxe
3010e751525SEric Saxe mach_state = (cpupm_mach_state_t *)
3020e751525SEric Saxe (cpudsp->cp->cpu_m.mcpu_pm_mach_state);
3030e751525SEric Saxe mach_state->ms_dip = cpudsp->dip;
304444f66e7SMark Haywood /*
305444f66e7SMark Haywood * allocate ppm CPU domain and initialize the topspeed
306444f66e7SMark Haywood * only if P-states are enabled.
307444f66e7SMark Haywood */
308444f66e7SMark Haywood if (cpudrv_power_ready(cpudsp->cp)) {
309444f66e7SMark Haywood (*cpupm_ppm_alloc_pstate_domains)(cpudsp->cp);
310444f66e7SMark Haywood topspeed = cpudrv_get_topspeed(cpudsp->dip);
311444f66e7SMark Haywood cpudrv_set_topspeed(cpudsp->dip, topspeed);
312444f66e7SMark Haywood }
313444f66e7SMark Haywood
314444f66e7SMark Haywood return (B_TRUE);
315444f66e7SMark Haywood }
316444f66e7SMark Haywood
317444f66e7SMark Haywood boolean_t
cpudrv_mach_fini(cpudrv_devstate_t * cpudsp)318444f66e7SMark Haywood cpudrv_mach_fini(cpudrv_devstate_t *cpudsp)
319444f66e7SMark Haywood {
320444f66e7SMark Haywood /*
321444f66e7SMark Haywood * return TRUE if cpu pointer is NULL
322444f66e7SMark Haywood */
323444f66e7SMark Haywood if (cpudsp->cp == NULL)
324444f66e7SMark Haywood return (B_TRUE);
325444f66e7SMark Haywood /*
326444f66e7SMark Haywood * free ppm cpu pstate domains only if
327444f66e7SMark Haywood * P-states are enabled
328444f66e7SMark Haywood */
329444f66e7SMark Haywood if (cpudrv_power_ready(cpudsp->cp)) {
330444f66e7SMark Haywood (*cpupm_ppm_free_pstate_domains)(cpudsp->cp);
331444f66e7SMark Haywood }
332444f66e7SMark Haywood
3330e751525SEric Saxe return (B_TRUE);
3340e751525SEric Saxe }
3350e751525SEric Saxe
3360e751525SEric Saxe uint_t
cpudrv_get_speeds(cpudrv_devstate_t * cpudsp,int ** speeds)3370e751525SEric Saxe cpudrv_get_speeds(cpudrv_devstate_t *cpudsp, int **speeds)
3380e751525SEric Saxe {
3396af9d452Saubrey.li@intel.com /*
3406af9d452Saubrey.li@intel.com * return nspeeds = 0 if can't get cpu_t
3416af9d452Saubrey.li@intel.com */
3426af9d452Saubrey.li@intel.com if (cpudrv_get_cpu(cpudsp) != DDI_SUCCESS)
3436af9d452Saubrey.li@intel.com return (0);
3446af9d452Saubrey.li@intel.com
3450e751525SEric Saxe return (cpupm_get_speeds(cpudsp->cp, speeds));
3460e751525SEric Saxe }
3470e751525SEric Saxe
3480e751525SEric Saxe void
cpudrv_free_speeds(int * speeds,uint_t nspeeds)3490e751525SEric Saxe cpudrv_free_speeds(int *speeds, uint_t nspeeds)
3500e751525SEric Saxe {
3510e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds);
3520e751525SEric Saxe }
3530e751525SEric Saxe
3540e751525SEric Saxe boolean_t
cpudrv_power_ready(cpu_t * cp)355444f66e7SMark Haywood cpudrv_power_ready(cpu_t *cp)
3560e751525SEric Saxe {
357444f66e7SMark Haywood return (cpupm_power_ready(cp));
3580e751525SEric Saxe }
3590e751525SEric Saxe
3600e751525SEric Saxe /* ARGSUSED */
3610e751525SEric Saxe void
cpudrv_set_supp_freqs(cpudrv_devstate_t * cpudsp)3620e751525SEric Saxe cpudrv_set_supp_freqs(cpudrv_devstate_t *cpudsp)
3630e751525SEric Saxe {
3640e751525SEric Saxe }
365