xref: /illumos-gate/usr/src/uts/i86pc/os/cpupm/cpupm_throttle.c (revision 027bcc9f64a0a5915089267b0dc54c9ee05782b0)
10e751525SEric Saxe /*
20e751525SEric Saxe  * CDDL HEADER START
30e751525SEric Saxe  *
40e751525SEric Saxe  * The contents of this file are subject to the terms of the
50e751525SEric Saxe  * Common Development and Distribution License (the "License").
60e751525SEric Saxe  * You may not use this file except in compliance with the License.
70e751525SEric Saxe  *
80e751525SEric Saxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe  * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe  * See the License for the specific language governing permissions
110e751525SEric Saxe  * and limitations under the License.
120e751525SEric Saxe  *
130e751525SEric Saxe  * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe  * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe  * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe  * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe  *
190e751525SEric Saxe  * CDDL HEADER END
200e751525SEric Saxe  */
210e751525SEric Saxe /*
220e751525SEric Saxe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230e751525SEric Saxe  * Use is subject to license terms.
240e751525SEric Saxe  */
250e751525SEric Saxe 
260e751525SEric Saxe #include <sys/x86_archext.h>
270e751525SEric Saxe #include <sys/machsystm.h>
280e751525SEric Saxe #include <sys/x_call.h>
290e751525SEric Saxe #include <sys/cpu_acpi.h>
300e751525SEric Saxe #include <sys/cpupm_throttle.h>
310e751525SEric Saxe #include <sys/dtrace.h>
320e751525SEric Saxe #include <sys/sdt.h>
330e751525SEric Saxe 
340e751525SEric Saxe static int cpupm_throttle_init(cpu_t *);
350e751525SEric Saxe static void cpupm_throttle_fini(cpu_t *);
360e751525SEric Saxe static void cpupm_throttle(cpuset_t,  uint32_t);
37444f66e7SMark Haywood static void cpupm_throttle_stop(cpu_t *);
380e751525SEric Saxe 
390e751525SEric Saxe cpupm_state_ops_t cpupm_throttle_ops = {
400e751525SEric Saxe 	"Generic ACPI T-state Support",
410e751525SEric Saxe 	cpupm_throttle_init,
420e751525SEric Saxe 	cpupm_throttle_fini,
43444f66e7SMark Haywood 	cpupm_throttle,
44444f66e7SMark Haywood 	cpupm_throttle_stop
450e751525SEric Saxe };
460e751525SEric Saxe 
470e751525SEric Saxe /*
480e751525SEric Saxe  * Error returns
490e751525SEric Saxe  */
500e751525SEric Saxe #define	THROTTLE_RET_SUCCESS		0x00
510e751525SEric Saxe #define	THROTTLE_RET_INCOMPLETE_DATA	0x01
520e751525SEric Saxe #define	THROTTLE_RET_UNSUP_STATE	0x02
530e751525SEric Saxe #define	THROTTLE_RET_TRANS_INCOMPLETE	0x03
540e751525SEric Saxe 
550e751525SEric Saxe #define	THROTTLE_LATENCY_WAIT		1
560e751525SEric Saxe 
570e751525SEric Saxe /*
580e751525SEric Saxe  * MSR register for clock modulation
590e751525SEric Saxe  */
600e751525SEric Saxe #define	IA32_CLOCK_MODULATION_MSR	0x19A
610e751525SEric Saxe 
620e751525SEric Saxe /*
630e751525SEric Saxe  * Debugging support
640e751525SEric Saxe  */
650e751525SEric Saxe #ifdef  DEBUG
660e751525SEric Saxe volatile int cpupm_throttle_debug = 0;
670e751525SEric Saxe #define	CTDEBUG(arglist) if (cpupm_throttle_debug) printf arglist;
680e751525SEric Saxe #else
690e751525SEric Saxe #define	CTDEBUG(arglist)
700e751525SEric Saxe #endif
710e751525SEric Saxe 
720e751525SEric Saxe /*
730e751525SEric Saxe  * Write the _PTC ctrl register. How it is written, depends upon the _PTC
740e751525SEric Saxe  * APCI object value.
750e751525SEric Saxe  */
760e751525SEric Saxe static int
write_ctrl(cpu_acpi_handle_t handle,uint32_t ctrl)770e751525SEric Saxe write_ctrl(cpu_acpi_handle_t handle, uint32_t ctrl)
780e751525SEric Saxe {
790e751525SEric Saxe 	cpu_acpi_ptc_t *ptc_ctrl;
800e751525SEric Saxe 	uint64_t reg;
810e751525SEric Saxe 	int ret = 0;
820e751525SEric Saxe 
830e751525SEric Saxe 	ptc_ctrl = CPU_ACPI_PTC_CTRL(handle);
840e751525SEric Saxe 
850e751525SEric Saxe 	switch (ptc_ctrl->cr_addrspace_id) {
860e751525SEric Saxe 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
870e751525SEric Saxe 		/*
880e751525SEric Saxe 		 * Read current thermal state because reserved bits must be
890e751525SEric Saxe 		 * preserved, compose new value, and write it.The writable
900e751525SEric Saxe 		 * bits are 4:1 (1 to 4).
910e751525SEric Saxe 		 * Bits 3:1 => On-Demand Clock Modulation Duty Cycle
920e751525SEric Saxe 		 * Bit  4   => On-Demand Clock Modulation Enable
930e751525SEric Saxe 		 * Left shift ctrl by 1 to allign with bits 1-4 of MSR
940e751525SEric Saxe 		 */
950e751525SEric Saxe 		reg = rdmsr(IA32_CLOCK_MODULATION_MSR);
960e751525SEric Saxe 		reg &= ~((uint64_t)0x1E);
970e751525SEric Saxe 		reg |= ctrl;
980e751525SEric Saxe 		wrmsr(IA32_CLOCK_MODULATION_MSR, reg);
990e751525SEric Saxe 		break;
1000e751525SEric Saxe 
1010e751525SEric Saxe 	case ACPI_ADR_SPACE_SYSTEM_IO:
1020e751525SEric Saxe 		ret = cpu_acpi_write_port(ptc_ctrl->cr_address, ctrl,
1030e751525SEric Saxe 		    ptc_ctrl->cr_width);
1040e751525SEric Saxe 		break;
1050e751525SEric Saxe 
1060e751525SEric Saxe 	default:
1070e751525SEric Saxe 		DTRACE_PROBE1(throttle_ctrl_unsupported_type, uint8_t,
1080e751525SEric Saxe 		    ptc_ctrl->cr_addrspace_id);
1090e751525SEric Saxe 
1100e751525SEric Saxe 		ret = -1;
1110e751525SEric Saxe 	}
1120e751525SEric Saxe 
1130e751525SEric Saxe 	DTRACE_PROBE1(throttle_ctrl_write, uint32_t, ctrl);
1140e751525SEric Saxe 	DTRACE_PROBE1(throttle_ctrl_write_err, int, ret);
1150e751525SEric Saxe 
1160e751525SEric Saxe 	return (ret);
1170e751525SEric Saxe }
1180e751525SEric Saxe 
1190e751525SEric Saxe static int
read_status(cpu_acpi_handle_t handle,uint32_t * stat)1200e751525SEric Saxe read_status(cpu_acpi_handle_t handle, uint32_t *stat)
1210e751525SEric Saxe {
1220e751525SEric Saxe 	cpu_acpi_ptc_t *ptc_stat;
1230e751525SEric Saxe 	uint64_t reg;
1240e751525SEric Saxe 	int ret = 0;
1250e751525SEric Saxe 
1260e751525SEric Saxe 	ptc_stat = CPU_ACPI_PTC_STATUS(handle);
1270e751525SEric Saxe 
1280e751525SEric Saxe 	switch (ptc_stat->cr_addrspace_id) {
1290e751525SEric Saxe 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
1300e751525SEric Saxe 		reg = rdmsr(IA32_CLOCK_MODULATION_MSR);
1310e751525SEric Saxe 		*stat = reg & 0x1E;
1320e751525SEric Saxe 		ret = 0;
1330e751525SEric Saxe 		break;
1340e751525SEric Saxe 
1350e751525SEric Saxe 	case ACPI_ADR_SPACE_SYSTEM_IO:
1360e751525SEric Saxe 		ret = cpu_acpi_read_port(ptc_stat->cr_address, stat,
1370e751525SEric Saxe 		    ptc_stat->cr_width);
1380e751525SEric Saxe 		break;
1390e751525SEric Saxe 
1400e751525SEric Saxe 	default:
1410e751525SEric Saxe 		DTRACE_PROBE1(throttle_status_unsupported_type, uint8_t,
1420e751525SEric Saxe 		    ptc_stat->cr_addrspace_id);
1430e751525SEric Saxe 
1440e751525SEric Saxe 		return (-1);
1450e751525SEric Saxe 	}
1460e751525SEric Saxe 
1470e751525SEric Saxe 	DTRACE_PROBE1(throttle_status_read, uint32_t, *stat);
1480e751525SEric Saxe 	DTRACE_PROBE1(throttle_status_read_err, int, ret);
1490e751525SEric Saxe 
1500e751525SEric Saxe 	return (ret);
1510e751525SEric Saxe }
1520e751525SEric Saxe 
1530e751525SEric Saxe /*
1540e751525SEric Saxe  * Transition the current processor to the requested throttling state.
1550e751525SEric Saxe  */
156*027bcc9fSToomas Soome static int
cpupm_tstate_transition(xc_arg_t arg1,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)157*027bcc9fSToomas Soome cpupm_tstate_transition(xc_arg_t arg1, xc_arg_t arg2 __unused,
158*027bcc9fSToomas Soome     xc_arg_t arg3 __unused)
1590e751525SEric Saxe {
160*027bcc9fSToomas Soome 	uint32_t req_state = arg1;
1610e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
1620e751525SEric Saxe 	    (cpupm_mach_state_t *)CPU->cpu_m.mcpu_pm_mach_state;
1630e751525SEric Saxe 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
1640e751525SEric Saxe 	cpu_acpi_tstate_t *req_tstate;
1650e751525SEric Saxe 	uint32_t ctrl;
1660e751525SEric Saxe 	uint32_t stat;
1670e751525SEric Saxe 	int i;
1680e751525SEric Saxe 
1690e751525SEric Saxe 	req_tstate = (cpu_acpi_tstate_t *)CPU_ACPI_TSTATES(handle);
1700e751525SEric Saxe 	req_tstate += req_state;
1710e751525SEric Saxe 	DTRACE_PROBE1(throttle_transition, uint32_t,
1720e751525SEric Saxe 	    CPU_ACPI_FREQPER(req_tstate));
1730e751525SEric Saxe 
1740e751525SEric Saxe 	/*
1750e751525SEric Saxe 	 * Initiate the processor t-state change.
1760e751525SEric Saxe 	 */
1770e751525SEric Saxe 	ctrl = CPU_ACPI_TSTATE_CTRL(req_tstate);
1780e751525SEric Saxe 	if (write_ctrl(handle, ctrl) != 0) {
179*027bcc9fSToomas Soome 		return (0);
1800e751525SEric Saxe 	}
1810e751525SEric Saxe 
1820e751525SEric Saxe 	/*
1830e751525SEric Saxe 	 * If status is zero, then transition is synchronous and
1840e751525SEric Saxe 	 * no status value comparison is required.
1850e751525SEric Saxe 	 */
1860e751525SEric Saxe 	if (CPU_ACPI_TSTATE_STAT(req_tstate) == 0) {
187*027bcc9fSToomas Soome 		return (0);
1880e751525SEric Saxe 	}
1890e751525SEric Saxe 
1900e751525SEric Saxe 	/* Wait until switch is complete, but bound the loop just in case. */
1910e751525SEric Saxe 	for (i = CPU_ACPI_TSTATE_TRANSLAT(req_tstate) * 2; i >= 0;
1920e751525SEric Saxe 	    i -= THROTTLE_LATENCY_WAIT) {
1930e751525SEric Saxe 		if (read_status(handle, &stat) == 0 &&
1940e751525SEric Saxe 		    CPU_ACPI_TSTATE_STAT(req_tstate) == stat)
1950e751525SEric Saxe 			break;
1960e751525SEric Saxe 		drv_usecwait(THROTTLE_LATENCY_WAIT);
1970e751525SEric Saxe 	}
1980e751525SEric Saxe 
1990e751525SEric Saxe 	if (CPU_ACPI_TSTATE_STAT(req_tstate) != stat) {
2000e751525SEric Saxe 		DTRACE_PROBE(throttle_transition_incomplete);
2010e751525SEric Saxe 	}
202*027bcc9fSToomas Soome 	return (0);
2030e751525SEric Saxe }
2040e751525SEric Saxe 
2050e751525SEric Saxe static void
cpupm_throttle(cpuset_t set,uint32_t throtl_lvl)2060e751525SEric Saxe cpupm_throttle(cpuset_t set,  uint32_t throtl_lvl)
2070e751525SEric Saxe {
208*027bcc9fSToomas Soome 	xc_arg_t xc_arg = (xc_arg_t)throtl_lvl;
209*027bcc9fSToomas Soome 
2100e751525SEric Saxe 	/*
2110e751525SEric Saxe 	 * If thread is already running on target CPU then just
2120e751525SEric Saxe 	 * make the transition request. Otherwise, we'll need to
2130e751525SEric Saxe 	 * make a cross-call.
2140e751525SEric Saxe 	 */
2150e751525SEric Saxe 	kpreempt_disable();
2160e751525SEric Saxe 	if (CPU_IN_SET(set, CPU->cpu_id)) {
217*027bcc9fSToomas Soome 		cpupm_tstate_transition(xc_arg, 0, 0);
2180e751525SEric Saxe 		CPUSET_DEL(set, CPU->cpu_id);
2190e751525SEric Saxe 	}
2200e751525SEric Saxe 	if (!CPUSET_ISNULL(set)) {
221*027bcc9fSToomas Soome 		xc_call(xc_arg, 0, 0,
222*027bcc9fSToomas Soome 		    CPUSET2BV(set), cpupm_tstate_transition);
2230e751525SEric Saxe 	}
2240e751525SEric Saxe 	kpreempt_enable();
2250e751525SEric Saxe }
2260e751525SEric Saxe 
2270e751525SEric Saxe static int
cpupm_throttle_init(cpu_t * cp)2280e751525SEric Saxe cpupm_throttle_init(cpu_t *cp)
2290e751525SEric Saxe {
2300e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
2310e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
2320e751525SEric Saxe 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
2330e751525SEric Saxe 	cpu_acpi_ptc_t *ptc_stat;
23400f97612SMark Haywood 	int ret;
2350e751525SEric Saxe 
23600f97612SMark Haywood 	if ((ret = cpu_acpi_cache_tstate_data(handle)) != 0) {
23700f97612SMark Haywood 		if (ret < 0)
23800f97612SMark Haywood 			cmn_err(CE_NOTE,
23900f97612SMark Haywood 			    "!Support for CPU throttling is being "
24000f97612SMark Haywood 			    "disabled due to errors parsing ACPI T-state "
24100f97612SMark Haywood 			    "objects exported by BIOS.");
2420e751525SEric Saxe 		cpupm_throttle_fini(cp);
2430e751525SEric Saxe 		return (THROTTLE_RET_INCOMPLETE_DATA);
2440e751525SEric Saxe 	}
2450e751525SEric Saxe 
2460e751525SEric Saxe 	/*
2470e751525SEric Saxe 	 * Check the address space used for transitions
2480e751525SEric Saxe 	 */
2490e751525SEric Saxe 	ptc_stat = CPU_ACPI_PTC_STATUS(handle);
2500e751525SEric Saxe 	switch (ptc_stat->cr_addrspace_id) {
2510e751525SEric Saxe 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
2520e751525SEric Saxe 		CTDEBUG(("T-State transitions will use fixed hardware\n"));
2530e751525SEric Saxe 		break;
2540e751525SEric Saxe 	case ACPI_ADR_SPACE_SYSTEM_IO:
2550e751525SEric Saxe 		CTDEBUG(("T-State transitions will use System IO\n"));
2560e751525SEric Saxe 		break;
2570e751525SEric Saxe 	default:
25800f97612SMark Haywood 		cmn_err(CE_NOTE, "!_PTC configured for unsupported "
2590e751525SEric Saxe 		    "address space type = %d.", ptc_stat->cr_addrspace_id);
2600e751525SEric Saxe 		return (THROTTLE_RET_INCOMPLETE_DATA);
2610e751525SEric Saxe 	}
2620e751525SEric Saxe 
2630e751525SEric Saxe 	cpupm_alloc_domains(cp, CPUPM_T_STATES);
2640e751525SEric Saxe 
2650e751525SEric Saxe 	return (THROTTLE_RET_SUCCESS);
2660e751525SEric Saxe }
2670e751525SEric Saxe 
2680e751525SEric Saxe static void
cpupm_throttle_fini(cpu_t * cp)2690e751525SEric Saxe cpupm_throttle_fini(cpu_t *cp)
2700e751525SEric Saxe {
2710e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
2720e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
2730e751525SEric Saxe 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
2740e751525SEric Saxe 
2750e751525SEric Saxe 	cpupm_free_domains(&cpupm_tstate_domains);
2760e751525SEric Saxe 	cpu_acpi_free_tstate_data(handle);
2770e751525SEric Saxe }
2780e751525SEric Saxe 
279444f66e7SMark Haywood static void
cpupm_throttle_stop(cpu_t * cp)280444f66e7SMark Haywood cpupm_throttle_stop(cpu_t *cp)
281444f66e7SMark Haywood {
282444f66e7SMark Haywood 	cpupm_mach_state_t *mach_state =
283444f66e7SMark Haywood 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
284444f66e7SMark Haywood 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
285444f66e7SMark Haywood 
286444f66e7SMark Haywood 	cpupm_remove_domains(cp, CPUPM_T_STATES, &cpupm_tstate_domains);
287444f66e7SMark Haywood 	cpu_acpi_free_tstate_data(handle);
288444f66e7SMark Haywood }
289444f66e7SMark Haywood 
2900e751525SEric Saxe /*
2910e751525SEric Saxe  * This routine reads the ACPI _TPC object. It's accessed as a callback
2920e751525SEric Saxe  * by the cpu driver whenever a _TPC change notification is received.
2930e751525SEric Saxe  */
2940e751525SEric Saxe static int
cpupm_throttle_get_max(processorid_t cpu_id)2950e751525SEric Saxe cpupm_throttle_get_max(processorid_t cpu_id)
2960e751525SEric Saxe {
2970e751525SEric Saxe 	cpu_t			*cp = cpu[cpu_id];
2980e751525SEric Saxe 	cpupm_mach_state_t	*mach_state =
2990e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
3000e751525SEric Saxe 	cpu_acpi_handle_t	handle;
3010e751525SEric Saxe 	int			throtl_level;
3020e751525SEric Saxe 	int			max_throttle_lvl;
3030e751525SEric Saxe 	uint_t			num_throtl;
3040e751525SEric Saxe 
3050e751525SEric Saxe 	if (mach_state == NULL) {
3060e751525SEric Saxe 		return (-1);
3070e751525SEric Saxe 	}
3080e751525SEric Saxe 
3090e751525SEric Saxe 	handle = mach_state->ms_acpi_handle;
3100e751525SEric Saxe 	ASSERT(handle != NULL);
3110e751525SEric Saxe 
3120e751525SEric Saxe 	cpu_acpi_cache_tpc(handle);
3130e751525SEric Saxe 	throtl_level = CPU_ACPI_TPC(handle);
3140e751525SEric Saxe 
3150e751525SEric Saxe 	num_throtl = CPU_ACPI_TSTATES_COUNT(handle);
3160e751525SEric Saxe 
3170e751525SEric Saxe 	max_throttle_lvl = num_throtl - 1;
3180e751525SEric Saxe 	if ((throtl_level < 0) || (throtl_level > max_throttle_lvl)) {
3190e751525SEric Saxe 		cmn_err(CE_NOTE, "!cpupm_throttle_get_max: CPU %d: "
3200e751525SEric Saxe 		    "_TPC out of range %d", cp->cpu_id, throtl_level);
3210e751525SEric Saxe 		throtl_level = 0;
3220e751525SEric Saxe 	}
3230e751525SEric Saxe 
3240e751525SEric Saxe 	return (throtl_level);
3250e751525SEric Saxe }
3260e751525SEric Saxe 
3270e751525SEric Saxe /*
3280e751525SEric Saxe  * Take care of CPU throttling when _TPC notification arrives
3290e751525SEric Saxe  */
3300e751525SEric Saxe void
cpupm_throttle_manage_notification(void * ctx)3310e751525SEric Saxe cpupm_throttle_manage_notification(void *ctx)
3320e751525SEric Saxe {
3330e751525SEric Saxe 	cpu_t			*cp = ctx;
3340e751525SEric Saxe 	processorid_t		cpu_id = cp->cpu_id;
3350e751525SEric Saxe 	cpupm_mach_state_t	*mach_state =
3360e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
3370e751525SEric Saxe 	boolean_t		is_ready;
3380e751525SEric Saxe 	int			new_level;
3390e751525SEric Saxe 
3400e751525SEric Saxe 	if (mach_state == NULL) {
3410e751525SEric Saxe 		return;
3420e751525SEric Saxe 	}
3430e751525SEric Saxe 
3440e751525SEric Saxe 	/*
3450e751525SEric Saxe 	 * We currently refuse to power-manage if the CPU is not ready to
3460e751525SEric Saxe 	 * take cross calls (cross calls fail silently if CPU is not ready
3470e751525SEric Saxe 	 * for it).
3480e751525SEric Saxe 	 *
349444f66e7SMark Haywood 	 * Additionally, for x86 platforms we cannot power-manage an instance,
350444f66e7SMark Haywood 	 * until it has been initialized.
3510e751525SEric Saxe 	 */
352444f66e7SMark Haywood 	is_ready = (cp->cpu_flags & CPU_READY) && cpupm_throttle_ready(cp);
3530e751525SEric Saxe 	if (!is_ready)
3540e751525SEric Saxe 		return;
3550e751525SEric Saxe 
3560e751525SEric Saxe 	if (!(mach_state->ms_caps & CPUPM_T_STATES))
3570e751525SEric Saxe 		return;
3580e751525SEric Saxe 	ASSERT(mach_state->ms_tstate.cma_ops != NULL);
3590e751525SEric Saxe 
3600e751525SEric Saxe 	/*
3610e751525SEric Saxe 	 * Get the new T-State support level
3620e751525SEric Saxe 	 */
3630e751525SEric Saxe 	new_level = cpupm_throttle_get_max(cpu_id);
3640e751525SEric Saxe 
3650e751525SEric Saxe 	cpupm_state_change(cp, new_level, CPUPM_T_STATES);
3660e751525SEric Saxe }
367