1b47b5b34SRafael Vanoni /* 2b47b5b34SRafael Vanoni * Copyright 2009, Intel Corporation 3b47b5b34SRafael Vanoni * Copyright 2009, Sun Microsystems, Inc 4b47b5b34SRafael Vanoni * 5b47b5b34SRafael Vanoni * This file is part of PowerTOP 6b47b5b34SRafael Vanoni * 7b47b5b34SRafael Vanoni * This program file is free software; you can redistribute it and/or modify it 8b47b5b34SRafael Vanoni * under the terms of the GNU General Public License as published by the 9b47b5b34SRafael Vanoni * Free Software Foundation; version 2 of the License. 10b47b5b34SRafael Vanoni * 11b47b5b34SRafael Vanoni * This program is distributed in the hope that it will be useful, but WITHOUT 12b47b5b34SRafael Vanoni * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13b47b5b34SRafael Vanoni * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14b47b5b34SRafael Vanoni * for more details. 15b47b5b34SRafael Vanoni * 16b47b5b34SRafael Vanoni * You should have received a copy of the GNU General Public License 17b47b5b34SRafael Vanoni * along with this program in a file named COPYING; if not, write to the 18b47b5b34SRafael Vanoni * Free Software Foundation, Inc., 19b47b5b34SRafael Vanoni * 51 Franklin Street, Fifth Floor, 20b47b5b34SRafael Vanoni * Boston, MA 02110-1301 USA 21b47b5b34SRafael Vanoni * 22b47b5b34SRafael Vanoni * Authors: 23b47b5b34SRafael Vanoni * Arjan van de Ven <arjan@linux.intel.com> 24b47b5b34SRafael Vanoni * Eric C Saxe <eric.saxe@sun.com> 25b47b5b34SRafael Vanoni * Aubrey Li <aubrey.li@intel.com> 26b47b5b34SRafael Vanoni */ 27b47b5b34SRafael Vanoni 28b47b5b34SRafael Vanoni /* 29b47b5b34SRafael Vanoni * GPL Disclaimer 30b47b5b34SRafael Vanoni * 31b47b5b34SRafael Vanoni * For the avoidance of doubt, except that if any license choice other 32b47b5b34SRafael Vanoni * than GPL or LGPL is available it will apply instead, Sun elects to 33b47b5b34SRafael Vanoni * use only the General Public License version 2 (GPLv2) at this time 34b47b5b34SRafael Vanoni * for any software where a choice of GPL license versions is made 35b47b5b34SRafael Vanoni * available with the language indicating that GPLv2 or any later 36b47b5b34SRafael Vanoni * version may be used, or where a choice of which version of the GPL 37b47b5b34SRafael Vanoni * is applied is otherwise unspecified. 38b47b5b34SRafael Vanoni */ 39b47b5b34SRafael Vanoni 40b47b5b34SRafael Vanoni #include <stdlib.h> 41b47b5b34SRafael Vanoni #include <string.h> 42b47b5b34SRafael Vanoni #include <dtrace.h> 43b47b5b34SRafael Vanoni #include <kstat.h> 44b47b5b34SRafael Vanoni #include <errno.h> 45b47b5b34SRafael Vanoni #include "powertop.h" 46b47b5b34SRafael Vanoni 47636423dbSRafael Vanoni #define HZ2MHZ(speed) ((speed) / MICROSEC) 48b47b5b34SRafael Vanoni #define DTP_ARG_COUNT 2 49b47b5b34SRafael Vanoni #define DTP_ARG_LENGTH 5 50b47b5b34SRafael Vanoni 51b47b5b34SRafael Vanoni static uint64_t max_cpufreq = 0; 52b47b5b34SRafael Vanoni static dtrace_hdl_t *dtp; 53b47b5b34SRafael Vanoni static char **dtp_argv; 54b47b5b34SRafael Vanoni 55b47b5b34SRafael Vanoni /* 56b47b5b34SRafael Vanoni * Enabling PM through /etc/power.conf 57*9bbf5ba1SRafael Vanoni * See pt_cpufreq_suggest() 58b47b5b34SRafael Vanoni */ 59b47b5b34SRafael Vanoni static char default_conf[] = "/etc/power.conf"; 60b47b5b34SRafael Vanoni static char default_pmconf[] = "/usr/sbin/pmconfig"; 61b47b5b34SRafael Vanoni static char cpupm_enable[] = "echo cpupm enable >> /etc/power.conf"; 62b47b5b34SRafael Vanoni static char cpupm_treshold[] = "echo cpu-threshold 1s >> /etc/power.conf"; 63b47b5b34SRafael Vanoni 64b47b5b34SRafael Vanoni /* 65b47b5b34SRafael Vanoni * Buffer containing DTrace program to track CPU frequency transitions 66b47b5b34SRafael Vanoni */ 67b47b5b34SRafael Vanoni static const char *dtp_cpufreq = 68b47b5b34SRafael Vanoni "hrtime_t last[$0];" 69b47b5b34SRafael Vanoni "" 70b47b5b34SRafael Vanoni "BEGIN" 71b47b5b34SRafael Vanoni "{" 72b47b5b34SRafael Vanoni " begin = timestamp;" 73b47b5b34SRafael Vanoni "}" 74b47b5b34SRafael Vanoni "" 75b47b5b34SRafael Vanoni ":::cpu-change-speed" 76b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] != 0/" 77b47b5b34SRafael Vanoni "{" 78b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 79636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 80b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);" 81b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;" 82b47b5b34SRafael Vanoni "}" 83b47b5b34SRafael Vanoni ":::cpu-change-speed" 84b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] == 0/" 85b47b5b34SRafael Vanoni "{" 86b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 87636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 88b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);" 89b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;" 90b47b5b34SRafael Vanoni "}"; 91b47b5b34SRafael Vanoni 92b47b5b34SRafael Vanoni /* 93b47b5b34SRafael Vanoni * Same as above, but only for a specific CPU 94b47b5b34SRafael Vanoni */ 95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c = 96b47b5b34SRafael Vanoni "hrtime_t last;" 97b47b5b34SRafael Vanoni "" 98b47b5b34SRafael Vanoni "BEGIN" 99b47b5b34SRafael Vanoni "{" 100b47b5b34SRafael Vanoni " begin = timestamp;" 101b47b5b34SRafael Vanoni "}" 102b47b5b34SRafael Vanoni "" 103b47b5b34SRafael Vanoni ":::cpu-change-speed" 104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&" 105b47b5b34SRafael Vanoni " last != 0/" 106b47b5b34SRafael Vanoni "{" 107b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 108636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 109b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last);" 110b47b5b34SRafael Vanoni " last = timestamp;" 111b47b5b34SRafael Vanoni "}" 112b47b5b34SRafael Vanoni ":::cpu-change-speed" 113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&" 114b47b5b34SRafael Vanoni " last == 0/" 115b47b5b34SRafael Vanoni "{" 116b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 117636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 118b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);" 119b47b5b34SRafael Vanoni " last = timestamp;" 120b47b5b34SRafael Vanoni "}"; 121b47b5b34SRafael Vanoni 122b47b5b34SRafael Vanoni static int pt_cpufreq_setup(void); 123b47b5b34SRafael Vanoni static int pt_cpufreq_snapshot(void); 124b47b5b34SRafael Vanoni static int pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *); 125b47b5b34SRafael Vanoni static void pt_cpufreq_stat_account(double, uint_t); 126*9bbf5ba1SRafael Vanoni static int pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t); 127*9bbf5ba1SRafael Vanoni static int pt_cpufreq_check_pm(void); 128*9bbf5ba1SRafael Vanoni static void pt_cpufreq_enable(void); 129b47b5b34SRafael Vanoni 130b47b5b34SRafael Vanoni static int 131b47b5b34SRafael Vanoni pt_cpufreq_setup(void) 132b47b5b34SRafael Vanoni { 133b47b5b34SRafael Vanoni if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL) 134b47b5b34SRafael Vanoni return (EXIT_FAILURE); 135b47b5b34SRafael Vanoni 136b47b5b34SRafael Vanoni if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) { 137b47b5b34SRafael Vanoni free(dtp_argv); 138b47b5b34SRafael Vanoni return (EXIT_FAILURE); 139b47b5b34SRafael Vanoni } 140b47b5b34SRafael Vanoni 141b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed); 142b47b5b34SRafael Vanoni 143636423dbSRafael Vanoni if (PT_ON_CPU) { 144b47b5b34SRafael Vanoni if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH)) 145b47b5b34SRafael Vanoni == NULL) { 146b47b5b34SRafael Vanoni free(dtp_argv[0]); 147b47b5b34SRafael Vanoni free(dtp_argv); 148b47b5b34SRafael Vanoni return (EXIT_FAILURE); 149b47b5b34SRafael Vanoni } 150b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu); 151b47b5b34SRafael Vanoni } 152b47b5b34SRafael Vanoni 153b47b5b34SRafael Vanoni return (0); 154b47b5b34SRafael Vanoni } 155b47b5b34SRafael Vanoni 156b47b5b34SRafael Vanoni /* 157b47b5b34SRafael Vanoni * Perform setup necessary to enumerate and track CPU speed changes 158b47b5b34SRafael Vanoni */ 159b47b5b34SRafael Vanoni int 160b47b5b34SRafael Vanoni pt_cpufreq_stat_prepare(void) 161b47b5b34SRafael Vanoni { 162b47b5b34SRafael Vanoni dtrace_prog_t *prog; 163b47b5b34SRafael Vanoni dtrace_proginfo_t info; 164b47b5b34SRafael Vanoni dtrace_optval_t statustime; 165b47b5b34SRafael Vanoni kstat_ctl_t *kc; 166b47b5b34SRafael Vanoni kstat_t *ksp; 167b47b5b34SRafael Vanoni kstat_named_t *knp; 168b47b5b34SRafael Vanoni freq_state_info_t *state; 169b47b5b34SRafael Vanoni char *s, *token, *prog_ptr; 170b47b5b34SRafael Vanoni int err; 171b47b5b34SRafael Vanoni 172b47b5b34SRafael Vanoni if ((err = pt_cpufreq_setup()) != 0) { 173b47b5b34SRafael Vanoni pt_error("%s : failed to setup", __FILE__); 174b47b5b34SRafael Vanoni return (errno); 175b47b5b34SRafael Vanoni } 176b47b5b34SRafael Vanoni 177b47b5b34SRafael Vanoni state = g_pstate_info; 178b47b5b34SRafael Vanoni if ((g_cpu_power_states = calloc((size_t)g_ncpus, 179b47b5b34SRafael Vanoni sizeof (cpu_power_info_t))) == NULL) 180b47b5b34SRafael Vanoni return (-1); 181b47b5b34SRafael Vanoni 182b47b5b34SRafael Vanoni /* 183b47b5b34SRafael Vanoni * Enumerate the CPU frequencies 184b47b5b34SRafael Vanoni */ 185b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL) 186b47b5b34SRafael Vanoni return (errno); 187b47b5b34SRafael Vanoni 188b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL); 189b47b5b34SRafael Vanoni 190b47b5b34SRafael Vanoni if (ksp == NULL) { 191b47b5b34SRafael Vanoni err = errno; 192b47b5b34SRafael Vanoni (void) kstat_close(kc); 193b47b5b34SRafael Vanoni return (err); 194b47b5b34SRafael Vanoni } 195b47b5b34SRafael Vanoni 196b47b5b34SRafael Vanoni (void) kstat_read(kc, ksp, NULL); 197b47b5b34SRafael Vanoni 198b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "supported_frequencies_Hz"); 199b47b5b34SRafael Vanoni s = knp->value.str.addr.ptr; 200b47b5b34SRafael Vanoni 201b47b5b34SRafael Vanoni g_npstates = 0; 202b47b5b34SRafael Vanoni 203b47b5b34SRafael Vanoni for (token = strtok(s, ":"), s = NULL; 204b47b5b34SRafael Vanoni NULL != token && g_npstates < NSTATES; 205b47b5b34SRafael Vanoni token = strtok(NULL, ":")) { 206b47b5b34SRafael Vanoni 207b47b5b34SRafael Vanoni state->speed = HZ2MHZ(atoll(token)); 208b47b5b34SRafael Vanoni 209b47b5b34SRafael Vanoni if (state->speed > max_cpufreq) 210b47b5b34SRafael Vanoni max_cpufreq = state->speed; 211b47b5b34SRafael Vanoni 212b47b5b34SRafael Vanoni state->total_time = (uint64_t)0; 213b47b5b34SRafael Vanoni 214b47b5b34SRafael Vanoni g_npstates++; 215b47b5b34SRafael Vanoni state++; 216b47b5b34SRafael Vanoni } 217b47b5b34SRafael Vanoni 218b47b5b34SRafael Vanoni if (token != NULL) 219b47b5b34SRafael Vanoni pt_error("%s : exceeded NSTATES\n", __FILE__); 220b47b5b34SRafael Vanoni 221b47b5b34SRafael Vanoni (void) kstat_close(kc); 222b47b5b34SRafael Vanoni 223b47b5b34SRafael Vanoni /* 224b47b5b34SRafael Vanoni * Return if speed transition is not supported 225b47b5b34SRafael Vanoni */ 226b47b5b34SRafael Vanoni if (g_npstates < 2) 227b47b5b34SRafael Vanoni return (-1); 228b47b5b34SRafael Vanoni 229b47b5b34SRafael Vanoni /* 230b47b5b34SRafael Vanoni * Setup DTrace to look for CPU frequency changes 231b47b5b34SRafael Vanoni */ 232b47b5b34SRafael Vanoni if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { 233b47b5b34SRafael Vanoni pt_error("%s : cannot open dtrace library: %s\n", __FILE__, 234b47b5b34SRafael Vanoni dtrace_errmsg(NULL, err)); 235b47b5b34SRafael Vanoni return (-2); 236b47b5b34SRafael Vanoni } 237b47b5b34SRafael Vanoni 238b47b5b34SRafael Vanoni /* 239b47b5b34SRafael Vanoni * Execute different scripts (defined above) depending on 240b47b5b34SRafael Vanoni * user specified options. Default mode uses dtp_cpufreq. 241b47b5b34SRafael Vanoni */ 242636423dbSRafael Vanoni if (PT_ON_CPU) 243b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq_c; 244b47b5b34SRafael Vanoni else 245b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq; 246b47b5b34SRafael Vanoni 247b47b5b34SRafael Vanoni if ((prog = dtrace_program_strcompile(dtp, prog_ptr, 248b47b5b34SRafael Vanoni DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) { 249b47b5b34SRafael Vanoni pt_error("%s : cpu-change-speed probe unavailable\n", __FILE__); 250b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 251b47b5b34SRafael Vanoni } 252b47b5b34SRafael Vanoni 253b47b5b34SRafael Vanoni if (dtrace_program_exec(dtp, prog, &info) == -1) { 254b47b5b34SRafael Vanoni pt_error("%s : failed to enable speed probe\n", __FILE__); 255b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 256b47b5b34SRafael Vanoni } 257b47b5b34SRafael Vanoni 258b47b5b34SRafael Vanoni if (dtrace_setopt(dtp, "aggsize", "128k") == -1) { 259b47b5b34SRafael Vanoni pt_error("%s : failed to set speed 'aggsize'\n", __FILE__); 260b47b5b34SRafael Vanoni } 261b47b5b34SRafael Vanoni 262b47b5b34SRafael Vanoni if (dtrace_setopt(dtp, "aggrate", "0") == -1) { 263b47b5b34SRafael Vanoni pt_error("%s : failed to set speed 'aggrate'\n", __FILE__); 264b47b5b34SRafael Vanoni } 265b47b5b34SRafael Vanoni 266b47b5b34SRafael Vanoni if (dtrace_setopt(dtp, "aggpercpu", 0) == -1) { 267b47b5b34SRafael Vanoni pt_error("%s : failed to set speed 'aggpercpu'\n", __FILE__); 268b47b5b34SRafael Vanoni } 269b47b5b34SRafael Vanoni 270b47b5b34SRafael Vanoni if (dtrace_go(dtp) != 0) { 271b47b5b34SRafael Vanoni pt_error("%s : failed to start speed observation", __FILE__); 272b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 273b47b5b34SRafael Vanoni } 274b47b5b34SRafael Vanoni 275b47b5b34SRafael Vanoni if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) { 276b47b5b34SRafael Vanoni pt_error("%s : failed to get speed 'statusrate'\n", __FILE__); 277b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 278b47b5b34SRafael Vanoni } 279b47b5b34SRafael Vanoni 280b47b5b34SRafael Vanoni return (0); 281b47b5b34SRafael Vanoni } 282b47b5b34SRafael Vanoni 283b47b5b34SRafael Vanoni /* 284b47b5b34SRafael Vanoni * The DTrace probes have already been enabled, and are tracking 285b47b5b34SRafael Vanoni * CPU speed transitions. Take a snapshot of the aggregations, and 286b47b5b34SRafael Vanoni * look for any CPUs that have made a speed transition over the last 287b47b5b34SRafael Vanoni * sampling interval. Note that the aggregations may be empty if no 288b47b5b34SRafael Vanoni * speed transitions took place over the last interval. In that case, 289b47b5b34SRafael Vanoni * notate that we have already accounted for the time, so that when 290b47b5b34SRafael Vanoni * we do encounter a speed transition in a future sampling interval 291b47b5b34SRafael Vanoni * we can subtract that time back out. 292b47b5b34SRafael Vanoni */ 293b47b5b34SRafael Vanoni int 294b47b5b34SRafael Vanoni pt_cpufreq_stat_collect(double interval) 295b47b5b34SRafael Vanoni { 296b47b5b34SRafael Vanoni int i, ret; 297b47b5b34SRafael Vanoni 298b47b5b34SRafael Vanoni /* 299b47b5b34SRafael Vanoni * Zero out the interval time reported by DTrace for 300b47b5b34SRafael Vanoni * this interval 301b47b5b34SRafael Vanoni */ 302b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) 303b47b5b34SRafael Vanoni g_pstate_info[i].total_time = 0; 304b47b5b34SRafael Vanoni 305b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus; i++) 306b47b5b34SRafael Vanoni g_cpu_power_states[i].dtrace_time = 0; 307b47b5b34SRafael Vanoni 308b47b5b34SRafael Vanoni if (dtrace_status(dtp) == -1) 309b47b5b34SRafael Vanoni return (-1); 310b47b5b34SRafael Vanoni 311b47b5b34SRafael Vanoni if (dtrace_aggregate_snap(dtp) != 0) 312b47b5b34SRafael Vanoni pt_error("%s : failed to add to stats aggregation", __FILE__); 313b47b5b34SRafael Vanoni 314b47b5b34SRafael Vanoni if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk, 315b47b5b34SRafael Vanoni NULL) != 0) 316b47b5b34SRafael Vanoni pt_error("%s : failed to sort stats aggregation", __FILE__); 317b47b5b34SRafael Vanoni 318b47b5b34SRafael Vanoni dtrace_aggregate_clear(dtp); 319b47b5b34SRafael Vanoni 320b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot()) != 0) { 321b47b5b34SRafael Vanoni pt_error("%s : failed to add to stats aggregation", __FILE__); 322b47b5b34SRafael Vanoni return (ret); 323b47b5b34SRafael Vanoni } 324b47b5b34SRafael Vanoni 325b47b5b34SRafael Vanoni switch (g_op_mode) { 326636423dbSRafael Vanoni case PT_MODE_CPU: 327b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, g_observed_cpu); 328b47b5b34SRafael Vanoni break; 329636423dbSRafael Vanoni case PT_MODE_DEFAULT: 330b47b5b34SRafael Vanoni default: 331b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++) 332b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, i); 333b47b5b34SRafael Vanoni break; 334b47b5b34SRafael Vanoni } 335b47b5b34SRafael Vanoni 336b47b5b34SRafael Vanoni return (0); 337b47b5b34SRafael Vanoni } 338b47b5b34SRafael Vanoni 339b47b5b34SRafael Vanoni static void 340b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu) 341b47b5b34SRafael Vanoni { 342636423dbSRafael Vanoni cpu_power_info_t *cpu_pow; 343b47b5b34SRafael Vanoni uint64_t speed; 344b47b5b34SRafael Vanoni hrtime_t duration; 345b47b5b34SRafael Vanoni int i; 346b47b5b34SRafael Vanoni 347b47b5b34SRafael Vanoni cpu_pow = &g_cpu_power_states[cpu]; 348b47b5b34SRafael Vanoni speed = cpu_pow->current_pstate; 349b47b5b34SRafael Vanoni 350636423dbSRafael Vanoni duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time; 351636423dbSRafael Vanoni 352636423dbSRafael Vanoni /* 353636423dbSRafael Vanoni * 'duration' may be a negative value when we're using or forcing a 354636423dbSRafael Vanoni * small interval, and the amount of time already accounted ends up 355636423dbSRafael Vanoni * being larger than the the former. 356636423dbSRafael Vanoni */ 357636423dbSRafael Vanoni if (duration < 0) 358636423dbSRafael Vanoni return; 359b47b5b34SRafael Vanoni 360b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) { 361b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) { 362b47b5b34SRafael Vanoni g_pstate_info[i].total_time += duration; 363b47b5b34SRafael Vanoni cpu_pow->time_accounted += duration; 364636423dbSRafael Vanoni cpu_pow->speed_accounted = speed; 365b47b5b34SRafael Vanoni } 366b47b5b34SRafael Vanoni } 367b47b5b34SRafael Vanoni } 368b47b5b34SRafael Vanoni 369b47b5b34SRafael Vanoni /* 370b47b5b34SRafael Vanoni * Take a snapshot of each CPU's speed by looking through the cpu_info kstats. 371b47b5b34SRafael Vanoni */ 372b47b5b34SRafael Vanoni static int 373b47b5b34SRafael Vanoni pt_cpufreq_snapshot(void) 374b47b5b34SRafael Vanoni { 375b47b5b34SRafael Vanoni kstat_ctl_t *kc; 376b47b5b34SRafael Vanoni int ret; 377b47b5b34SRafael Vanoni uint_t i; 378b47b5b34SRafael Vanoni 379b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL) 380b47b5b34SRafael Vanoni return (errno); 381b47b5b34SRafael Vanoni 382b47b5b34SRafael Vanoni switch (g_op_mode) { 383636423dbSRafael Vanoni case PT_MODE_CPU: 384b47b5b34SRafael Vanoni ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu); 385b47b5b34SRafael Vanoni break; 386636423dbSRafael Vanoni case PT_MODE_DEFAULT: 387b47b5b34SRafael Vanoni default: 388b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++) 389b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0) 390b47b5b34SRafael Vanoni break; 391b47b5b34SRafael Vanoni break; 392b47b5b34SRafael Vanoni } 393b47b5b34SRafael Vanoni 394b47b5b34SRafael Vanoni if (kstat_close(kc) != 0) 395b47b5b34SRafael Vanoni pt_error("%s : couldn't close kstat\n", __FILE__); 396b47b5b34SRafael Vanoni 397b47b5b34SRafael Vanoni return (ret); 398b47b5b34SRafael Vanoni } 399b47b5b34SRafael Vanoni 400b47b5b34SRafael Vanoni static int 401b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu) 402b47b5b34SRafael Vanoni { 403b47b5b34SRafael Vanoni kstat_t *ksp; 404b47b5b34SRafael Vanoni kstat_named_t *knp; 405b47b5b34SRafael Vanoni 406b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL); 407b47b5b34SRafael Vanoni if (ksp == NULL) { 408b47b5b34SRafael Vanoni pt_error("%s : couldn't find cpu_info kstat for CPU " 409b47b5b34SRafael Vanoni "%d\n", __FILE__, cpu); 410b47b5b34SRafael Vanoni return (1); 411b47b5b34SRafael Vanoni } 412b47b5b34SRafael Vanoni 413b47b5b34SRafael Vanoni if (kstat_read(kc, ksp, NULL) == -1) { 414b47b5b34SRafael Vanoni pt_error("%s : couldn't read cpu_info kstat for " 415b47b5b34SRafael Vanoni "CPU %d\n", __FILE__, cpu); 416b47b5b34SRafael Vanoni return (2); 417b47b5b34SRafael Vanoni } 418b47b5b34SRafael Vanoni 419b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "current_clock_Hz"); 420b47b5b34SRafael Vanoni if (knp == NULL) { 421b47b5b34SRafael Vanoni pt_error("%s : couldn't find current_clock_Hz " 422b47b5b34SRafael Vanoni "kstat for CPU %d\n", __FILE__, cpu); 423b47b5b34SRafael Vanoni return (3); 424b47b5b34SRafael Vanoni } 425b47b5b34SRafael Vanoni 426b47b5b34SRafael Vanoni g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64); 427b47b5b34SRafael Vanoni 428b47b5b34SRafael Vanoni return (0); 429b47b5b34SRafael Vanoni } 430b47b5b34SRafael Vanoni 431b47b5b34SRafael Vanoni /* 432b47b5b34SRafael Vanoni * DTrace aggregation walker that sorts through a snapshot of the 433b47b5b34SRafael Vanoni * aggregation data collected during firings of the cpu-change-speed 434b47b5b34SRafael Vanoni * probe. 435b47b5b34SRafael Vanoni */ 436b47b5b34SRafael Vanoni /*ARGSUSED*/ 437b47b5b34SRafael Vanoni static int 438b47b5b34SRafael Vanoni pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg) 439b47b5b34SRafael Vanoni { 440b47b5b34SRafael Vanoni dtrace_aggdesc_t *aggdesc = data->dtada_desc; 441b47b5b34SRafael Vanoni dtrace_recdesc_t *cpu_rec, *speed_rec; 442636423dbSRafael Vanoni cpu_power_info_t *cp; 443b47b5b34SRafael Vanoni int32_t cpu; 444b47b5b34SRafael Vanoni uint64_t speed; 445636423dbSRafael Vanoni hrtime_t res; 446b47b5b34SRafael Vanoni int i; 447b47b5b34SRafael Vanoni 448b47b5b34SRafael Vanoni if (strcmp(aggdesc->dtagd_name, "times") == 0) { 449b47b5b34SRafael Vanoni cpu_rec = &aggdesc->dtagd_rec[1]; 450b47b5b34SRafael Vanoni speed_rec = &aggdesc->dtagd_rec[2]; 451b47b5b34SRafael Vanoni 452b47b5b34SRafael Vanoni /* LINTED - alignment */ 453b47b5b34SRafael Vanoni cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset); 454636423dbSRafael Vanoni 455636423dbSRafael Vanoni /* LINTED - alignment */ 456636423dbSRafael Vanoni res = *((hrtime_t *)(data->dtada_percpu[cpu])); 457636423dbSRafael Vanoni 458b47b5b34SRafael Vanoni /* LINTED - alignment */ 459b47b5b34SRafael Vanoni speed = *(uint64_t *)(data->dtada_data + 460b47b5b34SRafael Vanoni speed_rec->dtrd_offset); 461b47b5b34SRafael Vanoni 462636423dbSRafael Vanoni if (speed == 0) 463b47b5b34SRafael Vanoni speed = max_cpufreq; 464636423dbSRafael Vanoni else 465636423dbSRafael Vanoni speed = HZ2MHZ(speed); 466b47b5b34SRafael Vanoni 467b47b5b34SRafael Vanoni /* 468b47b5b34SRafael Vanoni * We have an aggregation record for "cpu" being at "speed" 469b47b5b34SRafael Vanoni * for an interval of "n" nanoseconds. The reported interval 470b47b5b34SRafael Vanoni * may exceed the powertop sampling interval, since we only 471b47b5b34SRafael Vanoni * notice during potentially infrequent firings of the 472b47b5b34SRafael Vanoni * "speed change" DTrace probe. In this case powertop would 473b47b5b34SRafael Vanoni * have already accounted for the portions of the interval 474b47b5b34SRafael Vanoni * that happened during prior powertop samplings, so subtract 475b47b5b34SRafael Vanoni * out time already accounted. 476b47b5b34SRafael Vanoni */ 477636423dbSRafael Vanoni cp = &g_cpu_power_states[cpu]; 478b47b5b34SRafael Vanoni 479b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) { 480b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) { 481636423dbSRafael Vanoni 482636423dbSRafael Vanoni if (cp->time_accounted > 0 && 483636423dbSRafael Vanoni cp->speed_accounted == speed) { 484636423dbSRafael Vanoni if (res > cp->time_accounted) { 485636423dbSRafael Vanoni res -= cp->time_accounted; 486636423dbSRafael Vanoni cp->time_accounted = 0; 487636423dbSRafael Vanoni cp->speed_accounted = 0; 488636423dbSRafael Vanoni } else { 489636423dbSRafael Vanoni return (DTRACE_AGGWALK_NEXT); 490b47b5b34SRafael Vanoni } 491b47b5b34SRafael Vanoni } 492636423dbSRafael Vanoni 493636423dbSRafael Vanoni g_pstate_info[i].total_time += res; 494636423dbSRafael Vanoni cp->dtrace_time += res; 495b47b5b34SRafael Vanoni } 496b47b5b34SRafael Vanoni } 497b47b5b34SRafael Vanoni } 498636423dbSRafael Vanoni 499b47b5b34SRafael Vanoni return (DTRACE_AGGWALK_NEXT); 500b47b5b34SRafael Vanoni } 501b47b5b34SRafael Vanoni 502b47b5b34SRafael Vanoni /* 503*9bbf5ba1SRafael Vanoni * Checks if PM is enabled in /etc/power.conf, enabling if not 504*9bbf5ba1SRafael Vanoni */ 505*9bbf5ba1SRafael Vanoni void 506*9bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void) 507*9bbf5ba1SRafael Vanoni { 508*9bbf5ba1SRafael Vanoni int ret = pt_cpufreq_check_pm(); 509*9bbf5ba1SRafael Vanoni 510*9bbf5ba1SRafael Vanoni switch (ret) { 511*9bbf5ba1SRafael Vanoni case 0: 512*9bbf5ba1SRafael Vanoni pt_sugg_add("Suggestion: enable CPU power management by " 513*9bbf5ba1SRafael Vanoni "pressing the P key", 40, 'P', (char *)g_msg_freq_enable, 514*9bbf5ba1SRafael Vanoni pt_cpufreq_enable); 515*9bbf5ba1SRafael Vanoni break; 516*9bbf5ba1SRafael Vanoni } 517*9bbf5ba1SRafael Vanoni } 518*9bbf5ba1SRafael Vanoni 519*9bbf5ba1SRafael Vanoni /* 520*9bbf5ba1SRafael Vanoni * Checks /etc/power.conf and returns: 521*9bbf5ba1SRafael Vanoni * 522*9bbf5ba1SRafael Vanoni * 0 if CPUPM is not enabled 523*9bbf5ba1SRafael Vanoni * 1 if there's nothing for us to do because: 524*9bbf5ba1SRafael Vanoni * (a) the system does not support frequency scaling 525*9bbf5ba1SRafael Vanoni * (b) there's no power.conf. 526*9bbf5ba1SRafael Vanoni * 2 if CPUPM is enabled 527*9bbf5ba1SRafael Vanoni * 3 if the system is running in poll-mode, as opposed to event-mode 528*9bbf5ba1SRafael Vanoni * 529*9bbf5ba1SRafael Vanoni * Notice the ordering of the return values, they will be picked up and 530*9bbf5ba1SRafael Vanoni * switched upon ascendingly. 531*9bbf5ba1SRafael Vanoni */ 532*9bbf5ba1SRafael Vanoni static int 533*9bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void) 534*9bbf5ba1SRafael Vanoni { 535*9bbf5ba1SRafael Vanoni char line[1024]; 536*9bbf5ba1SRafael Vanoni FILE *file; 537*9bbf5ba1SRafael Vanoni int ret = 0; 538*9bbf5ba1SRafael Vanoni 539*9bbf5ba1SRafael Vanoni if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL) 540*9bbf5ba1SRafael Vanoni return (1); 541*9bbf5ba1SRafael Vanoni 542*9bbf5ba1SRafael Vanoni (void) memset(line, 0, 1024); 543*9bbf5ba1SRafael Vanoni 544*9bbf5ba1SRafael Vanoni while (fgets(line, 1024, file)) { 545*9bbf5ba1SRafael Vanoni if (strstr(line, "cpupm")) { 546*9bbf5ba1SRafael Vanoni if (strstr(line, "enable")) { 547*9bbf5ba1SRafael Vanoni (void) fclose(file); 548*9bbf5ba1SRafael Vanoni return (2); 549*9bbf5ba1SRafael Vanoni } 550*9bbf5ba1SRafael Vanoni } 551*9bbf5ba1SRafael Vanoni if (strstr(line, "poll")) 552*9bbf5ba1SRafael Vanoni ret = 3; 553*9bbf5ba1SRafael Vanoni } 554*9bbf5ba1SRafael Vanoni 555*9bbf5ba1SRafael Vanoni (void) fclose(file); 556*9bbf5ba1SRafael Vanoni 557*9bbf5ba1SRafael Vanoni return (ret); 558*9bbf5ba1SRafael Vanoni } 559*9bbf5ba1SRafael Vanoni 560*9bbf5ba1SRafael Vanoni /* 561b47b5b34SRafael Vanoni * Used as a suggestion, sets PM in /etc/power.conf and 562b47b5b34SRafael Vanoni * a 1sec threshold, then calls /usr/sbin/pmconfig 563b47b5b34SRafael Vanoni */ 564*9bbf5ba1SRafael Vanoni static void 565*9bbf5ba1SRafael Vanoni pt_cpufreq_enable(void) 566b47b5b34SRafael Vanoni { 567b47b5b34SRafael Vanoni (void) system(cpupm_enable); 568b47b5b34SRafael Vanoni (void) system(cpupm_treshold); 569b47b5b34SRafael Vanoni (void) system(default_pmconf); 570b47b5b34SRafael Vanoni 571*9bbf5ba1SRafael Vanoni if (pt_sugg_remove(pt_cpufreq_enable) == 0) 572*9bbf5ba1SRafael Vanoni pt_error("%s : failed to remove a sugg.\n", __FILE__); 573b47b5b34SRafael Vanoni } 574