10e751525SEric Saxe /* 20e751525SEric Saxe * CDDL HEADER START 30e751525SEric Saxe * 40e751525SEric Saxe * The contents of this file are subject to the terms of the 50e751525SEric Saxe * Common Development and Distribution License (the "License"). 60e751525SEric Saxe * You may not use this file except in compliance with the License. 70e751525SEric Saxe * 80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing. 100e751525SEric Saxe * See the License for the specific language governing permissions 110e751525SEric Saxe * and limitations under the License. 120e751525SEric Saxe * 130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each 140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the 160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying 170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner] 180e751525SEric Saxe * 190e751525SEric Saxe * CDDL HEADER END 200e751525SEric Saxe */ 210e751525SEric Saxe /* 220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 230e751525SEric Saxe * Use is subject to license terms. 240e751525SEric Saxe */ 250e751525SEric Saxe 260e751525SEric Saxe #include <sys/x86_archext.h> 270e751525SEric Saxe #include <sys/machsystm.h> 280e751525SEric Saxe #include <sys/x_call.h> 290e751525SEric Saxe #include <sys/cpu_acpi.h> 300e751525SEric Saxe #include <sys/cpupm_throttle.h> 310e751525SEric Saxe #include <sys/dtrace.h> 320e751525SEric Saxe #include <sys/sdt.h> 330e751525SEric Saxe 340e751525SEric Saxe static int cpupm_throttle_init(cpu_t *); 350e751525SEric Saxe static void cpupm_throttle_fini(cpu_t *); 360e751525SEric Saxe static void cpupm_throttle(cpuset_t, uint32_t); 37*444f66e7SMark Haywood static void cpupm_throttle_stop(cpu_t *); 380e751525SEric Saxe 390e751525SEric Saxe cpupm_state_ops_t cpupm_throttle_ops = { 400e751525SEric Saxe "Generic ACPI T-state Support", 410e751525SEric Saxe cpupm_throttle_init, 420e751525SEric Saxe cpupm_throttle_fini, 43*444f66e7SMark Haywood cpupm_throttle, 44*444f66e7SMark Haywood cpupm_throttle_stop 450e751525SEric Saxe }; 460e751525SEric Saxe 470e751525SEric Saxe /* 480e751525SEric Saxe * Error returns 490e751525SEric Saxe */ 500e751525SEric Saxe #define THROTTLE_RET_SUCCESS 0x00 510e751525SEric Saxe #define THROTTLE_RET_INCOMPLETE_DATA 0x01 520e751525SEric Saxe #define THROTTLE_RET_UNSUP_STATE 0x02 530e751525SEric Saxe #define THROTTLE_RET_TRANS_INCOMPLETE 0x03 540e751525SEric Saxe 550e751525SEric Saxe #define THROTTLE_LATENCY_WAIT 1 560e751525SEric Saxe 570e751525SEric Saxe /* 580e751525SEric Saxe * MSR register for clock modulation 590e751525SEric Saxe */ 600e751525SEric Saxe #define IA32_CLOCK_MODULATION_MSR 0x19A 610e751525SEric Saxe 620e751525SEric Saxe /* 630e751525SEric Saxe * Debugging support 640e751525SEric Saxe */ 650e751525SEric Saxe #ifdef DEBUG 660e751525SEric Saxe volatile int cpupm_throttle_debug = 0; 670e751525SEric Saxe #define CTDEBUG(arglist) if (cpupm_throttle_debug) printf arglist; 680e751525SEric Saxe #else 690e751525SEric Saxe #define CTDEBUG(arglist) 700e751525SEric Saxe #endif 710e751525SEric Saxe 720e751525SEric Saxe /* 730e751525SEric Saxe * Write the _PTC ctrl register. How it is written, depends upon the _PTC 740e751525SEric Saxe * APCI object value. 750e751525SEric Saxe */ 760e751525SEric Saxe static int 770e751525SEric Saxe write_ctrl(cpu_acpi_handle_t handle, uint32_t ctrl) 780e751525SEric Saxe { 790e751525SEric Saxe cpu_acpi_ptc_t *ptc_ctrl; 800e751525SEric Saxe uint64_t reg; 810e751525SEric Saxe int ret = 0; 820e751525SEric Saxe 830e751525SEric Saxe ptc_ctrl = CPU_ACPI_PTC_CTRL(handle); 840e751525SEric Saxe 850e751525SEric Saxe switch (ptc_ctrl->cr_addrspace_id) { 860e751525SEric Saxe case ACPI_ADR_SPACE_FIXED_HARDWARE: 870e751525SEric Saxe /* 880e751525SEric Saxe * Read current thermal state because reserved bits must be 890e751525SEric Saxe * preserved, compose new value, and write it.The writable 900e751525SEric Saxe * bits are 4:1 (1 to 4). 910e751525SEric Saxe * Bits 3:1 => On-Demand Clock Modulation Duty Cycle 920e751525SEric Saxe * Bit 4 => On-Demand Clock Modulation Enable 930e751525SEric Saxe * Left shift ctrl by 1 to allign with bits 1-4 of MSR 940e751525SEric Saxe */ 950e751525SEric Saxe reg = rdmsr(IA32_CLOCK_MODULATION_MSR); 960e751525SEric Saxe reg &= ~((uint64_t)0x1E); 970e751525SEric Saxe reg |= ctrl; 980e751525SEric Saxe wrmsr(IA32_CLOCK_MODULATION_MSR, reg); 990e751525SEric Saxe break; 1000e751525SEric Saxe 1010e751525SEric Saxe case ACPI_ADR_SPACE_SYSTEM_IO: 1020e751525SEric Saxe ret = cpu_acpi_write_port(ptc_ctrl->cr_address, ctrl, 1030e751525SEric Saxe ptc_ctrl->cr_width); 1040e751525SEric Saxe break; 1050e751525SEric Saxe 1060e751525SEric Saxe default: 1070e751525SEric Saxe DTRACE_PROBE1(throttle_ctrl_unsupported_type, uint8_t, 1080e751525SEric Saxe ptc_ctrl->cr_addrspace_id); 1090e751525SEric Saxe 1100e751525SEric Saxe ret = -1; 1110e751525SEric Saxe } 1120e751525SEric Saxe 1130e751525SEric Saxe DTRACE_PROBE1(throttle_ctrl_write, uint32_t, ctrl); 1140e751525SEric Saxe DTRACE_PROBE1(throttle_ctrl_write_err, int, ret); 1150e751525SEric Saxe 1160e751525SEric Saxe return (ret); 1170e751525SEric Saxe } 1180e751525SEric Saxe 1190e751525SEric Saxe static int 1200e751525SEric Saxe read_status(cpu_acpi_handle_t handle, uint32_t *stat) 1210e751525SEric Saxe { 1220e751525SEric Saxe cpu_acpi_ptc_t *ptc_stat; 1230e751525SEric Saxe uint64_t reg; 1240e751525SEric Saxe int ret = 0; 1250e751525SEric Saxe 1260e751525SEric Saxe ptc_stat = CPU_ACPI_PTC_STATUS(handle); 1270e751525SEric Saxe 1280e751525SEric Saxe switch (ptc_stat->cr_addrspace_id) { 1290e751525SEric Saxe case ACPI_ADR_SPACE_FIXED_HARDWARE: 1300e751525SEric Saxe reg = rdmsr(IA32_CLOCK_MODULATION_MSR); 1310e751525SEric Saxe *stat = reg & 0x1E; 1320e751525SEric Saxe ret = 0; 1330e751525SEric Saxe break; 1340e751525SEric Saxe 1350e751525SEric Saxe case ACPI_ADR_SPACE_SYSTEM_IO: 1360e751525SEric Saxe ret = cpu_acpi_read_port(ptc_stat->cr_address, stat, 1370e751525SEric Saxe ptc_stat->cr_width); 1380e751525SEric Saxe break; 1390e751525SEric Saxe 1400e751525SEric Saxe default: 1410e751525SEric Saxe DTRACE_PROBE1(throttle_status_unsupported_type, uint8_t, 1420e751525SEric Saxe ptc_stat->cr_addrspace_id); 1430e751525SEric Saxe 1440e751525SEric Saxe return (-1); 1450e751525SEric Saxe } 1460e751525SEric Saxe 1470e751525SEric Saxe DTRACE_PROBE1(throttle_status_read, uint32_t, *stat); 1480e751525SEric Saxe DTRACE_PROBE1(throttle_status_read_err, int, ret); 1490e751525SEric Saxe 1500e751525SEric Saxe return (ret); 1510e751525SEric Saxe } 1520e751525SEric Saxe 1530e751525SEric Saxe /* 1540e751525SEric Saxe * Transition the current processor to the requested throttling state. 1550e751525SEric Saxe */ 1560e751525SEric Saxe static void 1570e751525SEric Saxe cpupm_tstate_transition(uint32_t req_state) 1580e751525SEric Saxe { 1590e751525SEric Saxe cpupm_mach_state_t *mach_state = 1600e751525SEric Saxe (cpupm_mach_state_t *)CPU->cpu_m.mcpu_pm_mach_state; 1610e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 1620e751525SEric Saxe cpu_acpi_tstate_t *req_tstate; 1630e751525SEric Saxe uint32_t ctrl; 1640e751525SEric Saxe uint32_t stat; 1650e751525SEric Saxe int i; 1660e751525SEric Saxe 1670e751525SEric Saxe req_tstate = (cpu_acpi_tstate_t *)CPU_ACPI_TSTATES(handle); 1680e751525SEric Saxe req_tstate += req_state; 1690e751525SEric Saxe DTRACE_PROBE1(throttle_transition, uint32_t, 1700e751525SEric Saxe CPU_ACPI_FREQPER(req_tstate)); 1710e751525SEric Saxe 1720e751525SEric Saxe /* 1730e751525SEric Saxe * Initiate the processor t-state change. 1740e751525SEric Saxe */ 1750e751525SEric Saxe ctrl = CPU_ACPI_TSTATE_CTRL(req_tstate); 1760e751525SEric Saxe if (write_ctrl(handle, ctrl) != 0) { 1770e751525SEric Saxe return; 1780e751525SEric Saxe } 1790e751525SEric Saxe 1800e751525SEric Saxe /* 1810e751525SEric Saxe * If status is zero, then transition is synchronous and 1820e751525SEric Saxe * no status value comparison is required. 1830e751525SEric Saxe */ 1840e751525SEric Saxe if (CPU_ACPI_TSTATE_STAT(req_tstate) == 0) { 1850e751525SEric Saxe return; 1860e751525SEric Saxe } 1870e751525SEric Saxe 1880e751525SEric Saxe /* Wait until switch is complete, but bound the loop just in case. */ 1890e751525SEric Saxe for (i = CPU_ACPI_TSTATE_TRANSLAT(req_tstate) * 2; i >= 0; 1900e751525SEric Saxe i -= THROTTLE_LATENCY_WAIT) { 1910e751525SEric Saxe if (read_status(handle, &stat) == 0 && 1920e751525SEric Saxe CPU_ACPI_TSTATE_STAT(req_tstate) == stat) 1930e751525SEric Saxe break; 1940e751525SEric Saxe drv_usecwait(THROTTLE_LATENCY_WAIT); 1950e751525SEric Saxe } 1960e751525SEric Saxe 1970e751525SEric Saxe if (CPU_ACPI_TSTATE_STAT(req_tstate) != stat) { 1980e751525SEric Saxe DTRACE_PROBE(throttle_transition_incomplete); 1990e751525SEric Saxe } 2000e751525SEric Saxe } 2010e751525SEric Saxe 2020e751525SEric Saxe static void 2030e751525SEric Saxe cpupm_throttle(cpuset_t set, uint32_t throtl_lvl) 2040e751525SEric Saxe { 2050e751525SEric Saxe /* 2060e751525SEric Saxe * If thread is already running on target CPU then just 2070e751525SEric Saxe * make the transition request. Otherwise, we'll need to 2080e751525SEric Saxe * make a cross-call. 2090e751525SEric Saxe */ 2100e751525SEric Saxe kpreempt_disable(); 2110e751525SEric Saxe if (CPU_IN_SET(set, CPU->cpu_id)) { 2120e751525SEric Saxe cpupm_tstate_transition(throtl_lvl); 2130e751525SEric Saxe CPUSET_DEL(set, CPU->cpu_id); 2140e751525SEric Saxe } 2150e751525SEric Saxe if (!CPUSET_ISNULL(set)) { 216f34a7178SJoe Bonasera xc_call((xc_arg_t)throtl_lvl, NULL, NULL, 217f34a7178SJoe Bonasera CPUSET2BV(set), (xc_func_t)cpupm_tstate_transition); 2180e751525SEric Saxe } 2190e751525SEric Saxe kpreempt_enable(); 2200e751525SEric Saxe } 2210e751525SEric Saxe 2220e751525SEric Saxe static int 2230e751525SEric Saxe cpupm_throttle_init(cpu_t *cp) 2240e751525SEric Saxe { 2250e751525SEric Saxe cpupm_mach_state_t *mach_state = 2260e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 2270e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 2280e751525SEric Saxe cpu_acpi_ptc_t *ptc_stat; 22900f97612SMark Haywood int ret; 2300e751525SEric Saxe 23100f97612SMark Haywood if ((ret = cpu_acpi_cache_tstate_data(handle)) != 0) { 23200f97612SMark Haywood if (ret < 0) 23300f97612SMark Haywood cmn_err(CE_NOTE, 23400f97612SMark Haywood "!Support for CPU throttling is being " 23500f97612SMark Haywood "disabled due to errors parsing ACPI T-state " 23600f97612SMark Haywood "objects exported by BIOS."); 2370e751525SEric Saxe cpupm_throttle_fini(cp); 2380e751525SEric Saxe return (THROTTLE_RET_INCOMPLETE_DATA); 2390e751525SEric Saxe } 2400e751525SEric Saxe 2410e751525SEric Saxe /* 2420e751525SEric Saxe * Check the address space used for transitions 2430e751525SEric Saxe */ 2440e751525SEric Saxe ptc_stat = CPU_ACPI_PTC_STATUS(handle); 2450e751525SEric Saxe switch (ptc_stat->cr_addrspace_id) { 2460e751525SEric Saxe case ACPI_ADR_SPACE_FIXED_HARDWARE: 2470e751525SEric Saxe CTDEBUG(("T-State transitions will use fixed hardware\n")); 2480e751525SEric Saxe break; 2490e751525SEric Saxe case ACPI_ADR_SPACE_SYSTEM_IO: 2500e751525SEric Saxe CTDEBUG(("T-State transitions will use System IO\n")); 2510e751525SEric Saxe break; 2520e751525SEric Saxe default: 25300f97612SMark Haywood cmn_err(CE_NOTE, "!_PTC configured for unsupported " 2540e751525SEric Saxe "address space type = %d.", ptc_stat->cr_addrspace_id); 2550e751525SEric Saxe return (THROTTLE_RET_INCOMPLETE_DATA); 2560e751525SEric Saxe } 2570e751525SEric Saxe 2580e751525SEric Saxe cpupm_alloc_domains(cp, CPUPM_T_STATES); 2590e751525SEric Saxe 2600e751525SEric Saxe return (THROTTLE_RET_SUCCESS); 2610e751525SEric Saxe } 2620e751525SEric Saxe 2630e751525SEric Saxe static void 2640e751525SEric Saxe cpupm_throttle_fini(cpu_t *cp) 2650e751525SEric Saxe { 2660e751525SEric Saxe cpupm_mach_state_t *mach_state = 2670e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 2680e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 2690e751525SEric Saxe 2700e751525SEric Saxe cpupm_free_domains(&cpupm_tstate_domains); 2710e751525SEric Saxe cpu_acpi_free_tstate_data(handle); 2720e751525SEric Saxe } 2730e751525SEric Saxe 274*444f66e7SMark Haywood static void 275*444f66e7SMark Haywood cpupm_throttle_stop(cpu_t *cp) 276*444f66e7SMark Haywood { 277*444f66e7SMark Haywood cpupm_mach_state_t *mach_state = 278*444f66e7SMark Haywood (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 279*444f66e7SMark Haywood cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; 280*444f66e7SMark Haywood 281*444f66e7SMark Haywood cpupm_remove_domains(cp, CPUPM_T_STATES, &cpupm_tstate_domains); 282*444f66e7SMark Haywood cpu_acpi_free_tstate_data(handle); 283*444f66e7SMark Haywood } 284*444f66e7SMark Haywood 2850e751525SEric Saxe /* 2860e751525SEric Saxe * This routine reads the ACPI _TPC object. It's accessed as a callback 2870e751525SEric Saxe * by the cpu driver whenever a _TPC change notification is received. 2880e751525SEric Saxe */ 2890e751525SEric Saxe static int 2900e751525SEric Saxe cpupm_throttle_get_max(processorid_t cpu_id) 2910e751525SEric Saxe { 2920e751525SEric Saxe cpu_t *cp = cpu[cpu_id]; 2930e751525SEric Saxe cpupm_mach_state_t *mach_state = 2940e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); 2950e751525SEric Saxe cpu_acpi_handle_t handle; 2960e751525SEric Saxe int throtl_level; 2970e751525SEric Saxe int max_throttle_lvl; 2980e751525SEric Saxe uint_t num_throtl; 2990e751525SEric Saxe 3000e751525SEric Saxe if (mach_state == NULL) { 3010e751525SEric Saxe return (-1); 3020e751525SEric Saxe } 3030e751525SEric Saxe 3040e751525SEric Saxe handle = mach_state->ms_acpi_handle; 3050e751525SEric Saxe ASSERT(handle != NULL); 3060e751525SEric Saxe 3070e751525SEric Saxe cpu_acpi_cache_tpc(handle); 3080e751525SEric Saxe throtl_level = CPU_ACPI_TPC(handle); 3090e751525SEric Saxe 3100e751525SEric Saxe num_throtl = CPU_ACPI_TSTATES_COUNT(handle); 3110e751525SEric Saxe 3120e751525SEric Saxe max_throttle_lvl = num_throtl - 1; 3130e751525SEric Saxe if ((throtl_level < 0) || (throtl_level > max_throttle_lvl)) { 3140e751525SEric Saxe cmn_err(CE_NOTE, "!cpupm_throttle_get_max: CPU %d: " 3150e751525SEric Saxe "_TPC out of range %d", cp->cpu_id, throtl_level); 3160e751525SEric Saxe throtl_level = 0; 3170e751525SEric Saxe } 3180e751525SEric Saxe 3190e751525SEric Saxe return (throtl_level); 3200e751525SEric Saxe } 3210e751525SEric Saxe 3220e751525SEric Saxe /* 3230e751525SEric Saxe * Take care of CPU throttling when _TPC notification arrives 3240e751525SEric Saxe */ 3250e751525SEric Saxe void 3260e751525SEric Saxe cpupm_throttle_manage_notification(void *ctx) 3270e751525SEric Saxe { 3280e751525SEric Saxe cpu_t *cp = ctx; 3290e751525SEric Saxe processorid_t cpu_id = cp->cpu_id; 3300e751525SEric Saxe cpupm_mach_state_t *mach_state = 3310e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; 3320e751525SEric Saxe boolean_t is_ready; 3330e751525SEric Saxe int new_level; 3340e751525SEric Saxe 3350e751525SEric Saxe if (mach_state == NULL) { 3360e751525SEric Saxe return; 3370e751525SEric Saxe } 3380e751525SEric Saxe 3390e751525SEric Saxe /* 3400e751525SEric Saxe * We currently refuse to power-manage if the CPU is not ready to 3410e751525SEric Saxe * take cross calls (cross calls fail silently if CPU is not ready 3420e751525SEric Saxe * for it). 3430e751525SEric Saxe * 344*444f66e7SMark Haywood * Additionally, for x86 platforms we cannot power-manage an instance, 345*444f66e7SMark Haywood * until it has been initialized. 3460e751525SEric Saxe */ 347*444f66e7SMark Haywood is_ready = (cp->cpu_flags & CPU_READY) && cpupm_throttle_ready(cp); 3480e751525SEric Saxe if (!is_ready) 3490e751525SEric Saxe return; 3500e751525SEric Saxe 3510e751525SEric Saxe if (!(mach_state->ms_caps & CPUPM_T_STATES)) 3520e751525SEric Saxe return; 3530e751525SEric Saxe ASSERT(mach_state->ms_tstate.cma_ops != NULL); 3540e751525SEric Saxe 3550e751525SEric Saxe /* 3560e751525SEric Saxe * Get the new T-State support level 3570e751525SEric Saxe */ 3580e751525SEric Saxe new_level = cpupm_throttle_get_max(cpu_id); 3590e751525SEric Saxe 3600e751525SEric Saxe cpupm_state_change(cp, new_level, CPUPM_T_STATES); 3610e751525SEric Saxe } 362