10e751525SEric Saxe /*
20e751525SEric Saxe * CDDL HEADER START
30e751525SEric Saxe *
40e751525SEric Saxe * The contents of this file are subject to the terms of the
50e751525SEric Saxe * Common Development and Distribution License (the "License").
60e751525SEric Saxe * You may not use this file except in compliance with the License.
70e751525SEric Saxe *
80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe * See the License for the specific language governing permissions
110e751525SEric Saxe * and limitations under the License.
120e751525SEric Saxe *
130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe *
190e751525SEric Saxe * CDDL HEADER END
200e751525SEric Saxe */
210e751525SEric Saxe /*
220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230e751525SEric Saxe * Use is subject to license terms.
240e751525SEric Saxe */
25444f66e7SMark Haywood /*
26444f66e7SMark Haywood * Copyright (c) 2009, Intel Corporation.
27444f66e7SMark Haywood * All Rights Reserved.
28444f66e7SMark Haywood */
290e751525SEric Saxe
300e751525SEric Saxe /*
310e751525SEric Saxe * CPU power management driver support for i86pc.
320e751525SEric Saxe */
330e751525SEric Saxe
340e751525SEric Saxe #include <sys/ddi.h>
350e751525SEric Saxe #include <sys/sunddi.h>
360e751525SEric Saxe #include <sys/cpupm.h>
370e751525SEric Saxe #include <sys/cpudrv_mach.h>
380e751525SEric Saxe #include <sys/machsystm.h>
390e751525SEric Saxe #include <sys/cpu_pm.h>
400e751525SEric Saxe #include <sys/cpuvar.h>
410e751525SEric Saxe #include <sys/sdt.h>
420e751525SEric Saxe #include <sys/cpu_idle.h>
430e751525SEric Saxe
440e751525SEric Saxe /*
450e751525SEric Saxe * Note that our driver numbers the power levels from lowest to
460e751525SEric Saxe * highest starting at 1 (i.e., the lowest power level is 1 and
470e751525SEric Saxe * the highest power level is cpupm->num_spd). The x86 modules get
480e751525SEric Saxe * their power levels from ACPI which numbers power levels from
490e751525SEric Saxe * highest to lowest starting at 0 (i.e., the lowest power level
500e751525SEric Saxe * is (cpupm->num_spd - 1) and the highest power level is 0). So to
510e751525SEric Saxe * map one of our driver power levels to one understood by ACPI we
520e751525SEric Saxe * simply subtract our driver power level from cpupm->num_spd. Likewise,
530e751525SEric Saxe * to map an ACPI power level to the proper driver power level, we
540e751525SEric Saxe * subtract the ACPI power level from cpupm->num_spd.
550e751525SEric Saxe */
560e751525SEric Saxe #define PM_2_PLAT_LEVEL(cpupm, pm_level) (cpupm->num_spd - pm_level)
570e751525SEric Saxe #define PLAT_2_PM_LEVEL(cpupm, plat_level) (cpupm->num_spd - plat_level)
580e751525SEric Saxe
590e751525SEric Saxe /*
600e751525SEric Saxe * Change CPU speed using interface provided by module.
610e751525SEric Saxe */
620e751525SEric Saxe int
cpudrv_change_speed(cpudrv_devstate_t * cpudsp,cpudrv_pm_spd_t * new_spd)630e751525SEric Saxe cpudrv_change_speed(cpudrv_devstate_t *cpudsp, cpudrv_pm_spd_t *new_spd)
640e751525SEric Saxe {
650e751525SEric Saxe cpu_t *cp = cpudsp->cp;
660e751525SEric Saxe cpupm_mach_state_t *mach_state =
670e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
680e751525SEric Saxe cpudrv_pm_t *cpupm;
690e751525SEric Saxe cpuset_t set;
700e751525SEric Saxe uint32_t plat_level;
710e751525SEric Saxe
720e751525SEric Saxe if (!(mach_state->ms_caps & CPUPM_P_STATES))
730e751525SEric Saxe return (DDI_FAILURE);
740e751525SEric Saxe ASSERT(mach_state->ms_pstate.cma_ops != NULL);
750e751525SEric Saxe cpupm = &(cpudsp->cpudrv_pm);
760e751525SEric Saxe plat_level = PM_2_PLAT_LEVEL(cpupm, new_spd->pm_level);
770e751525SEric Saxe CPUSET_ONLY(set, cp->cpu_id);
780e751525SEric Saxe mach_state->ms_pstate.cma_ops->cpus_change(set, plat_level);
790e751525SEric Saxe
800e751525SEric Saxe return (DDI_SUCCESS);
810e751525SEric Saxe }
820e751525SEric Saxe
830e751525SEric Saxe /*
840e751525SEric Saxe * Determine the cpu_id for the CPU device.
850e751525SEric Saxe */
860e751525SEric Saxe boolean_t
cpudrv_get_cpu_id(dev_info_t * dip,processorid_t * cpu_id)870e751525SEric Saxe cpudrv_get_cpu_id(dev_info_t *dip, processorid_t *cpu_id)
880e751525SEric Saxe {
890e751525SEric Saxe return ((*cpu_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
900e751525SEric Saxe DDI_PROP_DONTPASS, "reg", -1)) != -1);
910e751525SEric Saxe
920e751525SEric Saxe }
930e751525SEric Saxe
940e751525SEric Saxe boolean_t
cpudrv_is_enabled(cpudrv_devstate_t * cpudsp)950e751525SEric Saxe cpudrv_is_enabled(cpudrv_devstate_t *cpudsp)
960e751525SEric Saxe {
970e751525SEric Saxe cpupm_mach_state_t *mach_state;
980e751525SEric Saxe
990e751525SEric Saxe if (!cpupm_is_enabled(CPUPM_P_STATES) || !cpudrv_enabled)
1000e751525SEric Saxe return (B_FALSE);
1010e751525SEric Saxe
1020e751525SEric Saxe /*
1030e751525SEric Saxe * Only check the instance specific setting it exists.
1040e751525SEric Saxe */
1050e751525SEric Saxe if (cpudsp != NULL && cpudsp->cp != NULL &&
1060e751525SEric Saxe cpudsp->cp->cpu_m.mcpu_pm_mach_state != NULL) {
1070e751525SEric Saxe mach_state =
1080e751525SEric Saxe (cpupm_mach_state_t *)cpudsp->cp->cpu_m.mcpu_pm_mach_state;
1090e751525SEric Saxe return (mach_state->ms_caps & CPUPM_P_STATES);
1100e751525SEric Saxe }
1110e751525SEric Saxe
1120e751525SEric Saxe return (B_TRUE);
1130e751525SEric Saxe }
1140e751525SEric Saxe
1150e751525SEric Saxe /*
1160e751525SEric Saxe * Is the current thread the thread that is handling the
1170e751525SEric Saxe * PPC change notification?
1180e751525SEric Saxe */
1190e751525SEric Saxe boolean_t
cpudrv_is_governor_thread(cpudrv_pm_t * cpupm)1200e751525SEric Saxe cpudrv_is_governor_thread(cpudrv_pm_t *cpupm)
1210e751525SEric Saxe {
1220e751525SEric Saxe return (curthread == cpupm->pm_governor_thread);
1230e751525SEric Saxe }
1240e751525SEric Saxe
1250e751525SEric Saxe /*
1260e751525SEric Saxe * This routine changes the top speed to which the CPUs can transition by:
1270e751525SEric Saxe *
1280e751525SEric Saxe * - Resetting the up_spd for all speeds lower than the new top speed
1290e751525SEric Saxe * to point to the new top speed.
1300e751525SEric Saxe * - Updating the framework with a new "normal" (maximum power) for this
1310e751525SEric Saxe * device.
1320e751525SEric Saxe */
1330e751525SEric Saxe void
cpudrv_set_topspeed(void * ctx,int plat_level)1340e751525SEric Saxe cpudrv_set_topspeed(void *ctx, int plat_level)
1350e751525SEric Saxe {
1360e751525SEric Saxe cpudrv_devstate_t *cpudsp;
1370e751525SEric Saxe cpudrv_pm_t *cpupm;
1380e751525SEric Saxe cpudrv_pm_spd_t *spd;
1390e751525SEric Saxe cpudrv_pm_spd_t *top_spd;
1400e751525SEric Saxe dev_info_t *dip;
1410e751525SEric Saxe int pm_level;
1420e751525SEric Saxe int instance;
1430e751525SEric Saxe int i;
1440e751525SEric Saxe
1450e751525SEric Saxe dip = ctx;
1460e751525SEric Saxe instance = ddi_get_instance(dip);
1470e751525SEric Saxe cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1480e751525SEric Saxe ASSERT(cpudsp != NULL);
1490e751525SEric Saxe
1500e751525SEric Saxe mutex_enter(&cpudsp->lock);
1510e751525SEric Saxe cpupm = &(cpudsp->cpudrv_pm);
1520e751525SEric Saxe pm_level = PLAT_2_PM_LEVEL(cpupm, plat_level);
1530e751525SEric Saxe for (i = 0, spd = cpupm->head_spd; spd; i++, spd = spd->down_spd) {
1540e751525SEric Saxe /*
1550e751525SEric Saxe * Don't mess with speeds that are higher than the new
1560e751525SEric Saxe * top speed. They should be out of range anyway.
1570e751525SEric Saxe */
1580e751525SEric Saxe if (spd->pm_level > pm_level)
1590e751525SEric Saxe continue;
1600e751525SEric Saxe /*
1610e751525SEric Saxe * This is the new top speed.
1620e751525SEric Saxe */
1630e751525SEric Saxe if (spd->pm_level == pm_level)
1640e751525SEric Saxe top_spd = spd;
1650e751525SEric Saxe
1660e751525SEric Saxe spd->up_spd = top_spd;
1670e751525SEric Saxe }
1680e751525SEric Saxe cpupm->top_spd = top_spd;
1690e751525SEric Saxe
1700e751525SEric Saxe cpupm->pm_governor_thread = curthread;
1710e751525SEric Saxe
1720e751525SEric Saxe mutex_exit(&cpudsp->lock);
1730e751525SEric Saxe
1740e751525SEric Saxe (void) pm_update_maxpower(dip, 0, top_spd->pm_level);
1750e751525SEric Saxe }
1760e751525SEric Saxe
1770e751525SEric Saxe /*
1780e751525SEric Saxe * This routine reads the ACPI _PPC object. It's accessed as a callback
1790e751525SEric Saxe * by the ppm driver whenever a _PPC change notification is received.
1800e751525SEric Saxe */
1810e751525SEric Saxe int
cpudrv_get_topspeed(void * ctx)1820e751525SEric Saxe cpudrv_get_topspeed(void *ctx)
1830e751525SEric Saxe {
1840e751525SEric Saxe cpu_t *cp;
1850e751525SEric Saxe cpudrv_devstate_t *cpudsp;
1860e751525SEric Saxe dev_info_t *dip;
1870e751525SEric Saxe int instance;
1880e751525SEric Saxe int plat_level;
1890e751525SEric Saxe
1900e751525SEric Saxe dip = ctx;
1910e751525SEric Saxe instance = ddi_get_instance(dip);
1920e751525SEric Saxe cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1930e751525SEric Saxe ASSERT(cpudsp != NULL);
1940e751525SEric Saxe cp = cpudsp->cp;
1950e751525SEric Saxe plat_level = cpupm_get_top_speed(cp);
1960e751525SEric Saxe
1970e751525SEric Saxe return (plat_level);
1980e751525SEric Saxe }
1990e751525SEric Saxe
2000e751525SEric Saxe
2010e751525SEric Saxe /*
2020e751525SEric Saxe * This notification handler is called whenever the ACPI _PPC
2030e751525SEric Saxe * object changes. The _PPC is a sort of governor on power levels.
2040e751525SEric Saxe * It sets an upper threshold on which, _PSS defined, power levels
2050e751525SEric Saxe * are usuable. The _PPC value is dynamic and may change as properties
2060e751525SEric Saxe * (i.e., thermal or AC source) of the system change.
2070e751525SEric Saxe */
2080e751525SEric Saxe /* ARGSUSED */
2090e751525SEric Saxe static void
cpudrv_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)2100e751525SEric Saxe cpudrv_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
2110e751525SEric Saxe {
212d218c8f0SMark Haywood cpu_t *cp;
213d218c8f0SMark Haywood cpupm_mach_state_t *mach_state;
214d218c8f0SMark Haywood cpudrv_devstate_t *cpudsp;
215d218c8f0SMark Haywood dev_info_t *dip;
216d218c8f0SMark Haywood int instance;
2170e751525SEric Saxe extern pm_cpupm_t cpupm;
2180e751525SEric Saxe
219d218c8f0SMark Haywood dip = ctx;
220d218c8f0SMark Haywood instance = ddi_get_instance(dip);
221d218c8f0SMark Haywood cpudsp = ddi_get_soft_state(cpudrv_state, instance);
222d218c8f0SMark Haywood if (cpudsp == NULL)
223d218c8f0SMark Haywood return;
224d218c8f0SMark Haywood cp = cpudsp->cp;
225d218c8f0SMark Haywood mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
226d218c8f0SMark Haywood if (mach_state == NULL)
227d218c8f0SMark Haywood return;
228d218c8f0SMark Haywood
2290e751525SEric Saxe /*
2300e751525SEric Saxe * We only handle _PPC change notifications.
2310e751525SEric Saxe */
232d218c8f0SMark Haywood if (!PM_EVENT_CPUPM && val == CPUPM_PPC_CHANGE_NOTIFICATION &&
233d218c8f0SMark Haywood mach_state->ms_caps & CPUPM_P_STATES)
2340e751525SEric Saxe cpudrv_redefine_topspeed(ctx);
2350e751525SEric Saxe }
2360e751525SEric Saxe
2370e751525SEric Saxe void
cpudrv_install_notify_handler(cpudrv_devstate_t * cpudsp)2380e751525SEric Saxe cpudrv_install_notify_handler(cpudrv_devstate_t *cpudsp)
2390e751525SEric Saxe {
2400e751525SEric Saxe cpu_t *cp = cpudsp->cp;
2410e751525SEric Saxe cpupm_add_notify_handler(cp, cpudrv_notify_handler,
2420e751525SEric Saxe cpudsp->dip);
2430e751525SEric Saxe }
2440e751525SEric Saxe
2450e751525SEric Saxe void
cpudrv_uninstall_notify_handler(cpudrv_devstate_t * cpudsp)246444f66e7SMark Haywood cpudrv_uninstall_notify_handler(cpudrv_devstate_t *cpudsp)
247444f66e7SMark Haywood {
248444f66e7SMark Haywood cpu_t *cp = cpudsp->cp;
249444f66e7SMark Haywood cpupm_notification_t *entry, **next;
250444f66e7SMark Haywood
251444f66e7SMark Haywood ASSERT(cp != NULL);
252444f66e7SMark Haywood cpupm_mach_state_t *mach_state =
253444f66e7SMark Haywood (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
254444f66e7SMark Haywood
255444f66e7SMark Haywood mutex_enter(&mach_state->ms_lock);
256444f66e7SMark Haywood if (mach_state->ms_handlers == NULL) {
257444f66e7SMark Haywood mutex_exit(&mach_state->ms_lock);
258444f66e7SMark Haywood return;
259444f66e7SMark Haywood }
260444f66e7SMark Haywood
261444f66e7SMark Haywood for (next = &mach_state->ms_handlers; (entry = *next) != NULL; ) {
262444f66e7SMark Haywood if (entry->nq_handler != cpudrv_notify_handler) {
263444f66e7SMark Haywood next = &entry->nq_next;
264444f66e7SMark Haywood continue;
265444f66e7SMark Haywood }
266444f66e7SMark Haywood *next = entry->nq_next;
267444f66e7SMark Haywood kmem_free(entry, sizeof (cpupm_notification_t));
268444f66e7SMark Haywood }
269444f66e7SMark Haywood mutex_exit(&mach_state->ms_lock);
270444f66e7SMark Haywood }
271444f66e7SMark Haywood
272444f66e7SMark Haywood void
cpudrv_redefine_topspeed(void * ctx)2730e751525SEric Saxe cpudrv_redefine_topspeed(void *ctx)
2740e751525SEric Saxe {
2750e751525SEric Saxe /*
2760e751525SEric Saxe * This should never happen, unless ppm does not get loaded.
2770e751525SEric Saxe */
2780e751525SEric Saxe if (cpupm_redefine_topspeed == NULL) {
2790e751525SEric Saxe cmn_err(CE_WARN, "cpudrv_redefine_topspeed: "
2800e751525SEric Saxe "cpupm_redefine_topspeed has not been initialized - "
2810e751525SEric Saxe "ignoring notification");
2820e751525SEric Saxe return;
2830e751525SEric Saxe }
2840e751525SEric Saxe
2850e751525SEric Saxe /*
2860e751525SEric Saxe * ppm callback needs to handle redefinition for all CPUs in
2870e751525SEric Saxe * the domain.
2880e751525SEric Saxe */
2890e751525SEric Saxe (*cpupm_redefine_topspeed)(ctx);
2900e751525SEric Saxe }
2910e751525SEric Saxe
2920e751525SEric Saxe boolean_t
cpudrv_mach_init(cpudrv_devstate_t * cpudsp)2930e751525SEric Saxe cpudrv_mach_init(cpudrv_devstate_t *cpudsp)
2940e751525SEric Saxe {
2950e751525SEric Saxe cpupm_mach_state_t *mach_state;
296444f66e7SMark Haywood int topspeed;
2970e751525SEric Saxe
298444f66e7SMark Haywood ASSERT(cpudsp->cp);
2990e751525SEric Saxe
3000e751525SEric Saxe mach_state = (cpupm_mach_state_t *)
3010e751525SEric Saxe (cpudsp->cp->cpu_m.mcpu_pm_mach_state);
3020e751525SEric Saxe mach_state->ms_dip = cpudsp->dip;
303444f66e7SMark Haywood /*
304444f66e7SMark Haywood * allocate ppm CPU domain and initialize the topspeed
305444f66e7SMark Haywood * only if P-states are enabled.
306444f66e7SMark Haywood */
307444f66e7SMark Haywood if (cpudrv_power_ready(cpudsp->cp)) {
308444f66e7SMark Haywood (*cpupm_ppm_alloc_pstate_domains)(cpudsp->cp);
309444f66e7SMark Haywood topspeed = cpudrv_get_topspeed(cpudsp->dip);
310444f66e7SMark Haywood cpudrv_set_topspeed(cpudsp->dip, topspeed);
311444f66e7SMark Haywood }
312444f66e7SMark Haywood
313444f66e7SMark Haywood return (B_TRUE);
314444f66e7SMark Haywood }
315444f66e7SMark Haywood
316444f66e7SMark Haywood boolean_t
cpudrv_mach_fini(cpudrv_devstate_t * cpudsp)317444f66e7SMark Haywood cpudrv_mach_fini(cpudrv_devstate_t *cpudsp)
318444f66e7SMark Haywood {
319444f66e7SMark Haywood /*
320444f66e7SMark Haywood * return TRUE if cpu pointer is NULL
321444f66e7SMark Haywood */
322444f66e7SMark Haywood if (cpudsp->cp == NULL)
323444f66e7SMark Haywood return (B_TRUE);
324444f66e7SMark Haywood /*
325444f66e7SMark Haywood * free ppm cpu pstate domains only if
326444f66e7SMark Haywood * P-states are enabled
327444f66e7SMark Haywood */
328444f66e7SMark Haywood if (cpudrv_power_ready(cpudsp->cp)) {
329444f66e7SMark Haywood (*cpupm_ppm_free_pstate_domains)(cpudsp->cp);
330444f66e7SMark Haywood }
331444f66e7SMark Haywood
3320e751525SEric Saxe return (B_TRUE);
3330e751525SEric Saxe }
3340e751525SEric Saxe
3350e751525SEric Saxe uint_t
cpudrv_get_speeds(cpudrv_devstate_t * cpudsp,int ** speeds)3360e751525SEric Saxe cpudrv_get_speeds(cpudrv_devstate_t *cpudsp, int **speeds)
3370e751525SEric Saxe {
338*6af9d452Saubrey.li@intel.com /*
339*6af9d452Saubrey.li@intel.com * return nspeeds = 0 if can't get cpu_t
340*6af9d452Saubrey.li@intel.com */
341*6af9d452Saubrey.li@intel.com if (cpudrv_get_cpu(cpudsp) != DDI_SUCCESS)
342*6af9d452Saubrey.li@intel.com return (0);
343*6af9d452Saubrey.li@intel.com
3440e751525SEric Saxe return (cpupm_get_speeds(cpudsp->cp, speeds));
3450e751525SEric Saxe }
3460e751525SEric Saxe
3470e751525SEric Saxe void
cpudrv_free_speeds(int * speeds,uint_t nspeeds)3480e751525SEric Saxe cpudrv_free_speeds(int *speeds, uint_t nspeeds)
3490e751525SEric Saxe {
3500e751525SEric Saxe cpupm_free_speeds(speeds, nspeeds);
3510e751525SEric Saxe }
3520e751525SEric Saxe
3530e751525SEric Saxe boolean_t
cpudrv_power_ready(cpu_t * cp)354444f66e7SMark Haywood cpudrv_power_ready(cpu_t *cp)
3550e751525SEric Saxe {
356444f66e7SMark Haywood return (cpupm_power_ready(cp));
3570e751525SEric Saxe }
3580e751525SEric Saxe
3590e751525SEric Saxe /* ARGSUSED */
3600e751525SEric Saxe void
cpudrv_set_supp_freqs(cpudrv_devstate_t * cpudsp)3610e751525SEric Saxe cpudrv_set_supp_freqs(cpudrv_devstate_t *cpudsp)
3620e751525SEric Saxe {
3630e751525SEric Saxe }
364