/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include static int cpupm_throttle_init(cpu_t *); static void cpupm_throttle_fini(cpu_t *); static void cpupm_throttle(cpuset_t, uint32_t); cpupm_state_ops_t cpupm_throttle_ops = { "Generic ACPI T-state Support", cpupm_throttle_init, cpupm_throttle_fini, cpupm_throttle }; /* * Error returns */ #define THROTTLE_RET_SUCCESS 0x00 #define THROTTLE_RET_INCOMPLETE_DATA 0x01 #define THROTTLE_RET_UNSUP_STATE 0x02 #define THROTTLE_RET_TRANS_INCOMPLETE 0x03 #define THROTTLE_LATENCY_WAIT 1 /* * MSR register for clock modulation */ #define IA32_CLOCK_MODULATION_MSR 0x19A /* * Debugging support */ #ifdef DEBUG volatile int cpupm_throttle_debug = 0; #define CTDEBUG(arglist) if (cpupm_throttle_debug) printf arglist; #else #define CTDEBUG(arglist) #endif /* * Write the _PTC ctrl register. How it is written, depends upon the _PTC * APCI object value. */ static int write_ctrl(cpu_acpi_handle_t handle, uint32_t ctrl) { cpu_acpi_ptc_t *ptc_ctrl; uint64_t reg; int ret = 0; ptc_ctrl = CPU_ACPI_PTC_CTRL(handle); switch (ptc_ctrl->cr_addrspace_id) { case ACPI_ADR_SPACE_FIXED_HARDWARE: /* * Read current thermal state because reserved bits must be * preserved, compose new value, and write it.The writable * bits are 4:1 (1 to 4). * Bits 3:1 => On-Demand Clock Modulation Duty Cycle * Bit 4 => On-Demand Clock Modulation Enable * Left shift ctrl by 1 to allign with bits 1-4 of MSR */ reg = rdmsr(IA32_CLOCK_MODULATION_MSR); reg &= ~((uint64_t)0x1E); reg |= ctrl; wrmsr(IA32_CLOCK_MODULATION_MSR, reg); break; case ACPI_ADR_SPACE_SYSTEM_IO: ret = cpu_acpi_write_port(ptc_ctrl->cr_address, ctrl, ptc_ctrl->cr_width); break; default: DTRACE_PROBE1(throttle_ctrl_unsupported_type, uint8_t, ptc_ctrl->cr_addrspace_id); ret = -1; } DTRACE_PROBE1(throttle_ctrl_write, uint32_t, ctrl); DTRACE_PROBE1(throttle_ctrl_write_err, int, ret); return (ret); } static int read_status(cpu_acpi_handle_t handle, uint32_t *stat) { cpu_acpi_ptc_t *ptc_stat; uint64_t reg; int ret = 0; ptc_stat = CPU_ACPI_PTC_STATUS(handle); switch (ptc_stat->cr_addrspace_id) { case ACPI_ADR_SPACE_FIXED_HARDWARE: reg = rdmsr(IA32_CLOCK_MODULATION_MSR); *stat = reg & 0x1E; ret = 0; break; case ACPI_ADR_SPACE_SYSTEM_IO: ret = cpu_acpi_read_port(ptc_stat->cr_address, stat, ptc_stat->cr_width); break; default: DTRACE_PROBE1(throttle_status_unsupported_type, uint8_t, ptc_stat->cr_addrspace_id); return (-1); } DTRACE_PROBE1(throttle_status_read, uint32_t, *stat); DTRACE_PROBE1(throttle_status_read_err, int, ret); return (ret); } /* * Transition the current processor to the requested throttling state. */ static void cpupm_tstate_transition(uint32_t req_state) { cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)CPU->cpu_m.mcpu_pm_mach_state; cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; cpu_acpi_tstate_t *req_tstate; uint32_t ctrl; uint32_t stat; int i; req_tstate = (cpu_acpi_tstate_t *)CPU_ACPI_TSTATES(handle); req_tstate += req_state; DTRACE_PROBE1(throttle_transition, uint32_t, CPU_ACPI_FREQPER(req_tstate)); /* * Initiate the processor t-state change. */ ctrl = CPU_ACPI_TSTATE_CTRL(req_tstate); if (write_ctrl(handle, ctrl) != 0) { return; } /* * If status is zero, then transition is synchronous and * no status value comparison is required. */ if (CPU_ACPI_TSTATE_STAT(req_tstate) == 0) { return; } /* Wait until switch is complete, but bound the loop just in case. */ for (i = CPU_ACPI_TSTATE_TRANSLAT(req_tstate) * 2; i >= 0; i -= THROTTLE_LATENCY_WAIT) { if (read_status(handle, &stat) == 0 && CPU_ACPI_TSTATE_STAT(req_tstate) == stat) break; drv_usecwait(THROTTLE_LATENCY_WAIT); } if (CPU_ACPI_TSTATE_STAT(req_tstate) != stat) { DTRACE_PROBE(throttle_transition_incomplete); } } static void cpupm_throttle(cpuset_t set, uint32_t throtl_lvl) { /* * If thread is already running on target CPU then just * make the transition request. Otherwise, we'll need to * make a cross-call. */ kpreempt_disable(); if (CPU_IN_SET(set, CPU->cpu_id)) { cpupm_tstate_transition(throtl_lvl); CPUSET_DEL(set, CPU->cpu_id); } if (!CPUSET_ISNULL(set)) { xc_call((xc_arg_t)throtl_lvl, NULL, NULL, CPUSET2BV(set), (xc_func_t)cpupm_tstate_transition); } kpreempt_enable(); } static int cpupm_throttle_init(cpu_t *cp) { cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; cpu_acpi_ptc_t *ptc_stat; if (cpu_acpi_cache_tstate_data(handle) != 0) { CTDEBUG(("Failed to cache T-state ACPI data\n")); cpupm_throttle_fini(cp); return (THROTTLE_RET_INCOMPLETE_DATA); } /* * Check the address space used for transitions */ ptc_stat = CPU_ACPI_PTC_STATUS(handle); switch (ptc_stat->cr_addrspace_id) { case ACPI_ADR_SPACE_FIXED_HARDWARE: CTDEBUG(("T-State transitions will use fixed hardware\n")); break; case ACPI_ADR_SPACE_SYSTEM_IO: CTDEBUG(("T-State transitions will use System IO\n")); break; default: cmn_err(CE_WARN, "!_PTC conifgured for unsupported " "address space type = %d.", ptc_stat->cr_addrspace_id); return (THROTTLE_RET_INCOMPLETE_DATA); } cpupm_alloc_domains(cp, CPUPM_T_STATES); return (THROTTLE_RET_SUCCESS); } static void cpupm_throttle_fini(cpu_t *cp) { cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; cpu_acpi_handle_t handle = mach_state->ms_acpi_handle; cpupm_free_domains(&cpupm_tstate_domains); cpu_acpi_free_tstate_data(handle); } /* * This routine reads the ACPI _TPC object. It's accessed as a callback * by the cpu driver whenever a _TPC change notification is received. */ static int cpupm_throttle_get_max(processorid_t cpu_id) { cpu_t *cp = cpu[cpu_id]; cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state); cpu_acpi_handle_t handle; int throtl_level; int max_throttle_lvl; uint_t num_throtl; if (mach_state == NULL) { return (-1); } handle = mach_state->ms_acpi_handle; ASSERT(handle != NULL); cpu_acpi_cache_tpc(handle); throtl_level = CPU_ACPI_TPC(handle); num_throtl = CPU_ACPI_TSTATES_COUNT(handle); max_throttle_lvl = num_throtl - 1; if ((throtl_level < 0) || (throtl_level > max_throttle_lvl)) { cmn_err(CE_NOTE, "!cpupm_throttle_get_max: CPU %d: " "_TPC out of range %d", cp->cpu_id, throtl_level); throtl_level = 0; } return (throtl_level); } /* * Take care of CPU throttling when _TPC notification arrives */ void cpupm_throttle_manage_notification(void *ctx) { cpu_t *cp = ctx; processorid_t cpu_id = cp->cpu_id; cpupm_mach_state_t *mach_state = (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state; boolean_t is_ready; int new_level; if (mach_state == NULL) { return; } /* * We currently refuse to power-manage if the CPU is not ready to * take cross calls (cross calls fail silently if CPU is not ready * for it). * * Additionally, for x86 platforms we cannot power-manage * any one instance, until all instances have been initialized. * That's because we don't know what the CPU domains look like * until all instances have been initialized. */ is_ready = (cp->cpu_flags & CPU_READY) && cpupm_throttle_ready(); if (!is_ready) return; if (!(mach_state->ms_caps & CPUPM_T_STATES)) return; ASSERT(mach_state->ms_tstate.cma_ops != NULL); /* * Get the new T-State support level */ new_level = cpupm_throttle_get_max(cpu_id); cpupm_state_change(cp, new_level, CPUPM_T_STATES); }