xref: /illumos-gate/usr/src/uts/i86pc/io/cpudrv_mach.c (revision 584b574a3b16c6772c8204ec1d1c957c56f22a87)
10e751525SEric Saxe /*
20e751525SEric Saxe  * CDDL HEADER START
30e751525SEric Saxe  *
40e751525SEric Saxe  * The contents of this file are subject to the terms of the
50e751525SEric Saxe  * Common Development and Distribution License (the "License").
60e751525SEric Saxe  * You may not use this file except in compliance with the License.
70e751525SEric Saxe  *
80e751525SEric Saxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe  * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe  * See the License for the specific language governing permissions
110e751525SEric Saxe  * and limitations under the License.
120e751525SEric Saxe  *
130e751525SEric Saxe  * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe  * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe  * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe  * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe  *
190e751525SEric Saxe  * CDDL HEADER END
200e751525SEric Saxe  */
210e751525SEric Saxe /*
220e751525SEric Saxe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230e751525SEric Saxe  * Use is subject to license terms.
240e751525SEric Saxe  */
25444f66e7SMark Haywood /*
26444f66e7SMark Haywood  * Copyright (c) 2009,  Intel Corporation.
27444f66e7SMark Haywood  * All Rights Reserved.
28444f66e7SMark Haywood  */
290e751525SEric Saxe 
300e751525SEric Saxe /*
310e751525SEric Saxe  * CPU power management driver support for i86pc.
320e751525SEric Saxe  */
330e751525SEric Saxe 
340e751525SEric Saxe #include <sys/ddi.h>
350e751525SEric Saxe #include <sys/sunddi.h>
360e751525SEric Saxe #include <sys/cpupm.h>
370e751525SEric Saxe #include <sys/cpudrv_mach.h>
380e751525SEric Saxe #include <sys/machsystm.h>
390e751525SEric Saxe #include <sys/cpu_pm.h>
400e751525SEric Saxe #include <sys/cpuvar.h>
410e751525SEric Saxe #include <sys/sdt.h>
420e751525SEric Saxe #include <sys/cpu_idle.h>
430e751525SEric Saxe 
440e751525SEric Saxe /*
450e751525SEric Saxe  * Note that our driver numbers the power levels from lowest to
460e751525SEric Saxe  * highest starting at 1 (i.e., the lowest power level is 1 and
470e751525SEric Saxe  * the highest power level is cpupm->num_spd). The x86 modules get
480e751525SEric Saxe  * their power levels from ACPI which numbers power levels from
490e751525SEric Saxe  * highest to lowest starting at 0 (i.e., the lowest power level
500e751525SEric Saxe  * is (cpupm->num_spd - 1) and the highest power level is 0). So to
510e751525SEric Saxe  * map one of our driver power levels to one understood by ACPI we
520e751525SEric Saxe  * simply subtract our driver power level from cpupm->num_spd. Likewise,
530e751525SEric Saxe  * to map an ACPI power level to the proper driver power level, we
540e751525SEric Saxe  * subtract the ACPI power level from cpupm->num_spd.
550e751525SEric Saxe  */
560e751525SEric Saxe #define	PM_2_PLAT_LEVEL(cpupm, pm_level) (cpupm->num_spd - pm_level)
570e751525SEric Saxe #define	PLAT_2_PM_LEVEL(cpupm, plat_level) (cpupm->num_spd - plat_level)
580e751525SEric Saxe 
590e751525SEric Saxe /*
600e751525SEric Saxe  * Change CPU speed using interface provided by module.
610e751525SEric Saxe  */
620e751525SEric Saxe int
cpudrv_change_speed(cpudrv_devstate_t * cpudsp,cpudrv_pm_spd_t * new_spd)630e751525SEric Saxe cpudrv_change_speed(cpudrv_devstate_t *cpudsp, cpudrv_pm_spd_t *new_spd)
640e751525SEric Saxe {
650e751525SEric Saxe 	cpu_t *cp = cpudsp->cp;
660e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
670e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
680e751525SEric Saxe 	cpudrv_pm_t *cpupm;
690e751525SEric Saxe 	cpuset_t set;
700e751525SEric Saxe 	uint32_t plat_level;
710e751525SEric Saxe 
720e751525SEric Saxe 	if (!(mach_state->ms_caps & CPUPM_P_STATES))
730e751525SEric Saxe 		return (DDI_FAILURE);
740e751525SEric Saxe 	ASSERT(mach_state->ms_pstate.cma_ops != NULL);
750e751525SEric Saxe 	cpupm = &(cpudsp->cpudrv_pm);
760e751525SEric Saxe 	plat_level = PM_2_PLAT_LEVEL(cpupm, new_spd->pm_level);
770e751525SEric Saxe 	CPUSET_ONLY(set, cp->cpu_id);
780e751525SEric Saxe 	mach_state->ms_pstate.cma_ops->cpus_change(set, plat_level);
790e751525SEric Saxe 
800e751525SEric Saxe 	return (DDI_SUCCESS);
810e751525SEric Saxe }
820e751525SEric Saxe 
830e751525SEric Saxe /*
840e751525SEric Saxe  * Determine the cpu_id for the CPU device.
850e751525SEric Saxe  */
860e751525SEric Saxe boolean_t
cpudrv_get_cpu_id(dev_info_t * dip,processorid_t * cpu_id)870e751525SEric Saxe cpudrv_get_cpu_id(dev_info_t *dip,  processorid_t *cpu_id)
880e751525SEric Saxe {
890e751525SEric Saxe 	return ((*cpu_id = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
900e751525SEric Saxe 	    DDI_PROP_DONTPASS, "reg", -1)) != -1);
910e751525SEric Saxe 
920e751525SEric Saxe }
930e751525SEric Saxe 
940e751525SEric Saxe boolean_t
cpudrv_is_enabled(cpudrv_devstate_t * cpudsp)950e751525SEric Saxe cpudrv_is_enabled(cpudrv_devstate_t *cpudsp)
960e751525SEric Saxe {
970e751525SEric Saxe 	cpupm_mach_state_t *mach_state;
980e751525SEric Saxe 
990e751525SEric Saxe 	if (!cpupm_is_enabled(CPUPM_P_STATES) || !cpudrv_enabled)
1000e751525SEric Saxe 		return (B_FALSE);
1010e751525SEric Saxe 
1020e751525SEric Saxe 	/*
1030e751525SEric Saxe 	 * Only check the instance specific setting it exists.
1040e751525SEric Saxe 	 */
1050e751525SEric Saxe 	if (cpudsp != NULL && cpudsp->cp != NULL &&
1060e751525SEric Saxe 	    cpudsp->cp->cpu_m.mcpu_pm_mach_state != NULL) {
1070e751525SEric Saxe 		mach_state =
1080e751525SEric Saxe 		    (cpupm_mach_state_t *)cpudsp->cp->cpu_m.mcpu_pm_mach_state;
1090e751525SEric Saxe 		return (mach_state->ms_caps & CPUPM_P_STATES);
1100e751525SEric Saxe 	}
1110e751525SEric Saxe 
1120e751525SEric Saxe 	return (B_TRUE);
1130e751525SEric Saxe }
1140e751525SEric Saxe 
1150e751525SEric Saxe /*
1160e751525SEric Saxe  * Is the current thread the thread that is handling the
1170e751525SEric Saxe  * PPC change notification?
1180e751525SEric Saxe  */
1190e751525SEric Saxe boolean_t
cpudrv_is_governor_thread(cpudrv_pm_t * cpupm)1200e751525SEric Saxe cpudrv_is_governor_thread(cpudrv_pm_t *cpupm)
1210e751525SEric Saxe {
1220e751525SEric Saxe 	return (curthread == cpupm->pm_governor_thread);
1230e751525SEric Saxe }
1240e751525SEric Saxe 
1250e751525SEric Saxe /*
1260e751525SEric Saxe  * This routine changes the top speed to which the CPUs can transition by:
1270e751525SEric Saxe  *
1280e751525SEric Saxe  * - Resetting the up_spd for all speeds lower than the new top speed
1290e751525SEric Saxe  *   to point to the new top speed.
1300e751525SEric Saxe  * - Updating the framework with a new "normal" (maximum power) for this
1310e751525SEric Saxe  *   device.
1320e751525SEric Saxe  */
1330e751525SEric Saxe void
cpudrv_set_topspeed(void * ctx,int plat_level)1340e751525SEric Saxe cpudrv_set_topspeed(void *ctx, int plat_level)
1350e751525SEric Saxe {
1360e751525SEric Saxe 	cpudrv_devstate_t *cpudsp;
1370e751525SEric Saxe 	cpudrv_pm_t *cpupm;
1380e751525SEric Saxe 	cpudrv_pm_spd_t	*spd;
1390e751525SEric Saxe 	cpudrv_pm_spd_t	*top_spd;
1400e751525SEric Saxe 	dev_info_t *dip;
1410e751525SEric Saxe 	int pm_level;
1420e751525SEric Saxe 	int instance;
1430e751525SEric Saxe 	int i;
1440e751525SEric Saxe 
145*584b574aSToomas Soome 	top_spd = NULL;
1460e751525SEric Saxe 	dip = ctx;
1470e751525SEric Saxe 	instance = ddi_get_instance(dip);
1480e751525SEric Saxe 	cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1490e751525SEric Saxe 	ASSERT(cpudsp != NULL);
1500e751525SEric Saxe 
1510e751525SEric Saxe 	mutex_enter(&cpudsp->lock);
1520e751525SEric Saxe 	cpupm = &(cpudsp->cpudrv_pm);
1530e751525SEric Saxe 	pm_level = PLAT_2_PM_LEVEL(cpupm, plat_level);
1540e751525SEric Saxe 	for (i = 0, spd = cpupm->head_spd; spd; i++, spd = spd->down_spd) {
1550e751525SEric Saxe 		/*
1560e751525SEric Saxe 		 * Don't mess with speeds that are higher than the new
1570e751525SEric Saxe 		 * top speed. They should be out of range anyway.
1580e751525SEric Saxe 		 */
1590e751525SEric Saxe 		if (spd->pm_level > pm_level)
1600e751525SEric Saxe 			continue;
1610e751525SEric Saxe 		/*
1620e751525SEric Saxe 		 * This is the new top speed.
1630e751525SEric Saxe 		 */
1640e751525SEric Saxe 		if (spd->pm_level == pm_level)
1650e751525SEric Saxe 			top_spd = spd;
1660e751525SEric Saxe 
1670e751525SEric Saxe 		spd->up_spd = top_spd;
1680e751525SEric Saxe 	}
1690e751525SEric Saxe 	cpupm->top_spd = top_spd;
1700e751525SEric Saxe 
1710e751525SEric Saxe 	cpupm->pm_governor_thread = curthread;
1720e751525SEric Saxe 
1730e751525SEric Saxe 	mutex_exit(&cpudsp->lock);
1740e751525SEric Saxe 
1750e751525SEric Saxe 	(void) pm_update_maxpower(dip, 0, top_spd->pm_level);
1760e751525SEric Saxe }
1770e751525SEric Saxe 
1780e751525SEric Saxe /*
1790e751525SEric Saxe  * This routine reads the ACPI _PPC object. It's accessed as a callback
1800e751525SEric Saxe  * by the ppm driver whenever a _PPC change notification is received.
1810e751525SEric Saxe  */
1820e751525SEric Saxe int
cpudrv_get_topspeed(void * ctx)1830e751525SEric Saxe cpudrv_get_topspeed(void *ctx)
1840e751525SEric Saxe {
1850e751525SEric Saxe 	cpu_t *cp;
1860e751525SEric Saxe 	cpudrv_devstate_t *cpudsp;
1870e751525SEric Saxe 	dev_info_t *dip;
1880e751525SEric Saxe 	int instance;
1890e751525SEric Saxe 	int plat_level;
1900e751525SEric Saxe 
1910e751525SEric Saxe 	dip = ctx;
1920e751525SEric Saxe 	instance = ddi_get_instance(dip);
1930e751525SEric Saxe 	cpudsp = ddi_get_soft_state(cpudrv_state, instance);
1940e751525SEric Saxe 	ASSERT(cpudsp != NULL);
1950e751525SEric Saxe 	cp = cpudsp->cp;
1960e751525SEric Saxe 	plat_level = cpupm_get_top_speed(cp);
1970e751525SEric Saxe 
1980e751525SEric Saxe 	return (plat_level);
1990e751525SEric Saxe }
2000e751525SEric Saxe 
2010e751525SEric Saxe 
2020e751525SEric Saxe /*
2030e751525SEric Saxe  * This notification handler is called whenever the ACPI _PPC
2040e751525SEric Saxe  * object changes. The _PPC is a sort of governor on power levels.
2050e751525SEric Saxe  * It sets an upper threshold on which, _PSS defined, power levels
2060e751525SEric Saxe  * are usuable. The _PPC value is dynamic and may change as properties
2070e751525SEric Saxe  * (i.e., thermal or AC source) of the system change.
2080e751525SEric Saxe  */
2090e751525SEric Saxe /* ARGSUSED */
2100e751525SEric Saxe static void
cpudrv_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)2110e751525SEric Saxe cpudrv_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
2120e751525SEric Saxe {
213d218c8f0SMark Haywood 	cpu_t			*cp;
214d218c8f0SMark Haywood 	cpupm_mach_state_t	*mach_state;
215d218c8f0SMark Haywood 	cpudrv_devstate_t	*cpudsp;
216d218c8f0SMark Haywood 	dev_info_t		*dip;
217d218c8f0SMark Haywood 	int			instance;
2180e751525SEric Saxe 	extern pm_cpupm_t	cpupm;
2190e751525SEric Saxe 
220d218c8f0SMark Haywood 	dip = ctx;
221d218c8f0SMark Haywood 	instance = ddi_get_instance(dip);
222d218c8f0SMark Haywood 	cpudsp = ddi_get_soft_state(cpudrv_state, instance);
223d218c8f0SMark Haywood 	if (cpudsp == NULL)
224d218c8f0SMark Haywood 		return;
225d218c8f0SMark Haywood 	cp = cpudsp->cp;
226d218c8f0SMark Haywood 	mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
227d218c8f0SMark Haywood 	if (mach_state == NULL)
228d218c8f0SMark Haywood 		return;
229d218c8f0SMark Haywood 
2300e751525SEric Saxe 	/*
2310e751525SEric Saxe 	 * We only handle _PPC change notifications.
2320e751525SEric Saxe 	 */
233d218c8f0SMark Haywood 	if (!PM_EVENT_CPUPM && val == CPUPM_PPC_CHANGE_NOTIFICATION &&
234d218c8f0SMark Haywood 	    mach_state->ms_caps & CPUPM_P_STATES)
2350e751525SEric Saxe 		cpudrv_redefine_topspeed(ctx);
2360e751525SEric Saxe }
2370e751525SEric Saxe 
2380e751525SEric Saxe void
cpudrv_install_notify_handler(cpudrv_devstate_t * cpudsp)2390e751525SEric Saxe cpudrv_install_notify_handler(cpudrv_devstate_t *cpudsp)
2400e751525SEric Saxe {
2410e751525SEric Saxe 	cpu_t *cp = cpudsp->cp;
2420e751525SEric Saxe 	cpupm_add_notify_handler(cp, cpudrv_notify_handler,
2430e751525SEric Saxe 	    cpudsp->dip);
2440e751525SEric Saxe }
2450e751525SEric Saxe 
2460e751525SEric Saxe void
cpudrv_uninstall_notify_handler(cpudrv_devstate_t * cpudsp)247444f66e7SMark Haywood cpudrv_uninstall_notify_handler(cpudrv_devstate_t *cpudsp)
248444f66e7SMark Haywood {
249444f66e7SMark Haywood 	cpu_t *cp = cpudsp->cp;
250444f66e7SMark Haywood 	cpupm_notification_t *entry, **next;
251444f66e7SMark Haywood 
252444f66e7SMark Haywood 	ASSERT(cp != NULL);
253444f66e7SMark Haywood 	cpupm_mach_state_t *mach_state =
254444f66e7SMark Haywood 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
255444f66e7SMark Haywood 
256444f66e7SMark Haywood 	mutex_enter(&mach_state->ms_lock);
257444f66e7SMark Haywood 	if (mach_state->ms_handlers == NULL) {
258444f66e7SMark Haywood 		mutex_exit(&mach_state->ms_lock);
259444f66e7SMark Haywood 		return;
260444f66e7SMark Haywood 	}
261444f66e7SMark Haywood 
262444f66e7SMark Haywood 	for (next = &mach_state->ms_handlers; (entry = *next) != NULL; ) {
263444f66e7SMark Haywood 		if (entry->nq_handler != cpudrv_notify_handler) {
264444f66e7SMark Haywood 			next = &entry->nq_next;
265444f66e7SMark Haywood 			continue;
266444f66e7SMark Haywood 		}
267444f66e7SMark Haywood 		*next = entry->nq_next;
268444f66e7SMark Haywood 		kmem_free(entry, sizeof (cpupm_notification_t));
269444f66e7SMark Haywood 	}
270444f66e7SMark Haywood 	mutex_exit(&mach_state->ms_lock);
271444f66e7SMark Haywood }
272444f66e7SMark Haywood 
273444f66e7SMark Haywood void
cpudrv_redefine_topspeed(void * ctx)2740e751525SEric Saxe cpudrv_redefine_topspeed(void *ctx)
2750e751525SEric Saxe {
2760e751525SEric Saxe 	/*
2770e751525SEric Saxe 	 * This should never happen, unless ppm does not get loaded.
2780e751525SEric Saxe 	 */
2790e751525SEric Saxe 	if (cpupm_redefine_topspeed == NULL) {
2800e751525SEric Saxe 		cmn_err(CE_WARN, "cpudrv_redefine_topspeed: "
2810e751525SEric Saxe 		    "cpupm_redefine_topspeed has not been initialized - "
2820e751525SEric Saxe 		    "ignoring notification");
2830e751525SEric Saxe 		return;
2840e751525SEric Saxe 	}
2850e751525SEric Saxe 
2860e751525SEric Saxe 	/*
2870e751525SEric Saxe 	 * ppm callback needs to handle redefinition for all CPUs in
2880e751525SEric Saxe 	 * the domain.
2890e751525SEric Saxe 	 */
2900e751525SEric Saxe 	(*cpupm_redefine_topspeed)(ctx);
2910e751525SEric Saxe }
2920e751525SEric Saxe 
2930e751525SEric Saxe boolean_t
cpudrv_mach_init(cpudrv_devstate_t * cpudsp)2940e751525SEric Saxe cpudrv_mach_init(cpudrv_devstate_t *cpudsp)
2950e751525SEric Saxe {
2960e751525SEric Saxe 	cpupm_mach_state_t *mach_state;
297444f66e7SMark Haywood 	int topspeed;
2980e751525SEric Saxe 
299444f66e7SMark Haywood 	ASSERT(cpudsp->cp);
3000e751525SEric Saxe 
3010e751525SEric Saxe 	mach_state = (cpupm_mach_state_t *)
3020e751525SEric Saxe 	    (cpudsp->cp->cpu_m.mcpu_pm_mach_state);
3030e751525SEric Saxe 	mach_state->ms_dip = cpudsp->dip;
304444f66e7SMark Haywood 	/*
305444f66e7SMark Haywood 	 * allocate ppm CPU domain and initialize the topspeed
306444f66e7SMark Haywood 	 * only if P-states are enabled.
307444f66e7SMark Haywood 	 */
308444f66e7SMark Haywood 	if (cpudrv_power_ready(cpudsp->cp)) {
309444f66e7SMark Haywood 		(*cpupm_ppm_alloc_pstate_domains)(cpudsp->cp);
310444f66e7SMark Haywood 		topspeed = cpudrv_get_topspeed(cpudsp->dip);
311444f66e7SMark Haywood 		cpudrv_set_topspeed(cpudsp->dip, topspeed);
312444f66e7SMark Haywood 	}
313444f66e7SMark Haywood 
314444f66e7SMark Haywood 	return (B_TRUE);
315444f66e7SMark Haywood }
316444f66e7SMark Haywood 
317444f66e7SMark Haywood boolean_t
cpudrv_mach_fini(cpudrv_devstate_t * cpudsp)318444f66e7SMark Haywood cpudrv_mach_fini(cpudrv_devstate_t *cpudsp)
319444f66e7SMark Haywood {
320444f66e7SMark Haywood 	/*
321444f66e7SMark Haywood 	 * return TRUE if cpu pointer is NULL
322444f66e7SMark Haywood 	 */
323444f66e7SMark Haywood 	if (cpudsp->cp == NULL)
324444f66e7SMark Haywood 		return (B_TRUE);
325444f66e7SMark Haywood 	/*
326444f66e7SMark Haywood 	 * free ppm cpu pstate domains only if
327444f66e7SMark Haywood 	 * P-states are enabled
328444f66e7SMark Haywood 	 */
329444f66e7SMark Haywood 	if (cpudrv_power_ready(cpudsp->cp)) {
330444f66e7SMark Haywood 		(*cpupm_ppm_free_pstate_domains)(cpudsp->cp);
331444f66e7SMark Haywood 	}
332444f66e7SMark Haywood 
3330e751525SEric Saxe 	return (B_TRUE);
3340e751525SEric Saxe }
3350e751525SEric Saxe 
3360e751525SEric Saxe uint_t
cpudrv_get_speeds(cpudrv_devstate_t * cpudsp,int ** speeds)3370e751525SEric Saxe cpudrv_get_speeds(cpudrv_devstate_t *cpudsp, int **speeds)
3380e751525SEric Saxe {
3396af9d452Saubrey.li@intel.com 	/*
3406af9d452Saubrey.li@intel.com 	 * return nspeeds = 0 if can't get cpu_t
3416af9d452Saubrey.li@intel.com 	 */
3426af9d452Saubrey.li@intel.com 	if (cpudrv_get_cpu(cpudsp) != DDI_SUCCESS)
3436af9d452Saubrey.li@intel.com 		return (0);
3446af9d452Saubrey.li@intel.com 
3450e751525SEric Saxe 	return (cpupm_get_speeds(cpudsp->cp, speeds));
3460e751525SEric Saxe }
3470e751525SEric Saxe 
3480e751525SEric Saxe void
cpudrv_free_speeds(int * speeds,uint_t nspeeds)3490e751525SEric Saxe cpudrv_free_speeds(int *speeds, uint_t nspeeds)
3500e751525SEric Saxe {
3510e751525SEric Saxe 	cpupm_free_speeds(speeds, nspeeds);
3520e751525SEric Saxe }
3530e751525SEric Saxe 
3540e751525SEric Saxe boolean_t
cpudrv_power_ready(cpu_t * cp)355444f66e7SMark Haywood cpudrv_power_ready(cpu_t *cp)
3560e751525SEric Saxe {
357444f66e7SMark Haywood 	return (cpupm_power_ready(cp));
3580e751525SEric Saxe }
3590e751525SEric Saxe 
3600e751525SEric Saxe /* ARGSUSED */
3610e751525SEric Saxe void
cpudrv_set_supp_freqs(cpudrv_devstate_t * cpudsp)3620e751525SEric Saxe cpudrv_set_supp_freqs(cpudrv_devstate_t *cpudsp)
3630e751525SEric Saxe {
3640e751525SEric Saxe }
365