1b47b5b34SRafael Vanoni /* 2b47b5b34SRafael Vanoni * Copyright 2009, Intel Corporation 3b47b5b34SRafael Vanoni * Copyright 2009, Sun Microsystems, Inc 4b47b5b34SRafael Vanoni * 5b47b5b34SRafael Vanoni * This file is part of PowerTOP 6b47b5b34SRafael Vanoni * 7b47b5b34SRafael Vanoni * This program file is free software; you can redistribute it and/or modify it 8b47b5b34SRafael Vanoni * under the terms of the GNU General Public License as published by the 9b47b5b34SRafael Vanoni * Free Software Foundation; version 2 of the License. 10b47b5b34SRafael Vanoni * 11b47b5b34SRafael Vanoni * This program is distributed in the hope that it will be useful, but WITHOUT 12b47b5b34SRafael Vanoni * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13b47b5b34SRafael Vanoni * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14b47b5b34SRafael Vanoni * for more details. 15b47b5b34SRafael Vanoni * 16b47b5b34SRafael Vanoni * You should have received a copy of the GNU General Public License 17b47b5b34SRafael Vanoni * along with this program in a file named COPYING; if not, write to the 18b47b5b34SRafael Vanoni * Free Software Foundation, Inc., 19b47b5b34SRafael Vanoni * 51 Franklin Street, Fifth Floor, 20b47b5b34SRafael Vanoni * Boston, MA 02110-1301 USA 21b47b5b34SRafael Vanoni * 22b47b5b34SRafael Vanoni * Authors: 23b47b5b34SRafael Vanoni * Arjan van de Ven <arjan@linux.intel.com> 24b47b5b34SRafael Vanoni * Eric C Saxe <eric.saxe@sun.com> 25b47b5b34SRafael Vanoni * Aubrey Li <aubrey.li@intel.com> 26b47b5b34SRafael Vanoni */ 27b47b5b34SRafael Vanoni 28b47b5b34SRafael Vanoni /* 29b47b5b34SRafael Vanoni * GPL Disclaimer 30b47b5b34SRafael Vanoni * 31b47b5b34SRafael Vanoni * For the avoidance of doubt, except that if any license choice other 32b47b5b34SRafael Vanoni * than GPL or LGPL is available it will apply instead, Sun elects to 33b47b5b34SRafael Vanoni * use only the General Public License version 2 (GPLv2) at this time 34b47b5b34SRafael Vanoni * for any software where a choice of GPL license versions is made 35b47b5b34SRafael Vanoni * available with the language indicating that GPLv2 or any later 36b47b5b34SRafael Vanoni * version may be used, or where a choice of which version of the GPL 37b47b5b34SRafael Vanoni * is applied is otherwise unspecified. 38b47b5b34SRafael Vanoni */ 39b47b5b34SRafael Vanoni 40b47b5b34SRafael Vanoni #include <stdlib.h> 41b47b5b34SRafael Vanoni #include <string.h> 42b47b5b34SRafael Vanoni #include <dtrace.h> 43b47b5b34SRafael Vanoni #include <kstat.h> 44b47b5b34SRafael Vanoni #include <errno.h> 45b47b5b34SRafael Vanoni #include "powertop.h" 46b47b5b34SRafael Vanoni 47636423dbSRafael Vanoni #define HZ2MHZ(speed) ((speed) / MICROSEC) 48b47b5b34SRafael Vanoni #define DTP_ARG_COUNT 2 49b47b5b34SRafael Vanoni #define DTP_ARG_LENGTH 5 50b47b5b34SRafael Vanoni 51b47b5b34SRafael Vanoni static uint64_t max_cpufreq = 0; 52b47b5b34SRafael Vanoni static dtrace_hdl_t *dtp; 53b47b5b34SRafael Vanoni static char **dtp_argv; 54b47b5b34SRafael Vanoni 55b47b5b34SRafael Vanoni /* 56b47b5b34SRafael Vanoni * Enabling PM through /etc/power.conf 579bbf5ba1SRafael Vanoni * See pt_cpufreq_suggest() 58b47b5b34SRafael Vanoni */ 59b47b5b34SRafael Vanoni static char default_conf[] = "/etc/power.conf"; 60b47b5b34SRafael Vanoni static char default_pmconf[] = "/usr/sbin/pmconfig"; 61b47b5b34SRafael Vanoni static char cpupm_enable[] = "echo cpupm enable >> /etc/power.conf"; 62b47b5b34SRafael Vanoni static char cpupm_treshold[] = "echo cpu-threshold 1s >> /etc/power.conf"; 63b47b5b34SRafael Vanoni 64b47b5b34SRafael Vanoni /* 65b47b5b34SRafael Vanoni * Buffer containing DTrace program to track CPU frequency transitions 66b47b5b34SRafael Vanoni */ 67b47b5b34SRafael Vanoni static const char *dtp_cpufreq = 68b47b5b34SRafael Vanoni "hrtime_t last[$0];" 69b47b5b34SRafael Vanoni "" 70b47b5b34SRafael Vanoni "BEGIN" 71b47b5b34SRafael Vanoni "{" 72b47b5b34SRafael Vanoni " begin = timestamp;" 73b47b5b34SRafael Vanoni "}" 74b47b5b34SRafael Vanoni "" 75b47b5b34SRafael Vanoni ":::cpu-change-speed" 76b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] != 0/" 77b47b5b34SRafael Vanoni "{" 78b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 79636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 80b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);" 81b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;" 82b47b5b34SRafael Vanoni "}" 83b47b5b34SRafael Vanoni ":::cpu-change-speed" 84b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] == 0/" 85b47b5b34SRafael Vanoni "{" 86b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 87636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 88b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);" 89b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;" 90b47b5b34SRafael Vanoni "}"; 91b47b5b34SRafael Vanoni 92b47b5b34SRafael Vanoni /* 93b47b5b34SRafael Vanoni * Same as above, but only for a specific CPU 94b47b5b34SRafael Vanoni */ 95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c = 96b47b5b34SRafael Vanoni "hrtime_t last;" 97b47b5b34SRafael Vanoni "" 98b47b5b34SRafael Vanoni "BEGIN" 99b47b5b34SRafael Vanoni "{" 100b47b5b34SRafael Vanoni " begin = timestamp;" 101b47b5b34SRafael Vanoni "}" 102b47b5b34SRafael Vanoni "" 103b47b5b34SRafael Vanoni ":::cpu-change-speed" 104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&" 105b47b5b34SRafael Vanoni " last != 0/" 106b47b5b34SRafael Vanoni "{" 107b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 108636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 109b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last);" 110b47b5b34SRafael Vanoni " last = timestamp;" 111b47b5b34SRafael Vanoni "}" 112b47b5b34SRafael Vanoni ":::cpu-change-speed" 113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&" 114b47b5b34SRafael Vanoni " last == 0/" 115b47b5b34SRafael Vanoni "{" 116b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;" 117636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;" 118b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);" 119b47b5b34SRafael Vanoni " last = timestamp;" 120b47b5b34SRafael Vanoni "}"; 121b47b5b34SRafael Vanoni 122b47b5b34SRafael Vanoni static int pt_cpufreq_setup(void); 123b47b5b34SRafael Vanoni static int pt_cpufreq_snapshot(void); 124b47b5b34SRafael Vanoni static int pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *); 125b47b5b34SRafael Vanoni static void pt_cpufreq_stat_account(double, uint_t); 1269bbf5ba1SRafael Vanoni static int pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t); 1279bbf5ba1SRafael Vanoni static int pt_cpufreq_check_pm(void); 1289bbf5ba1SRafael Vanoni static void pt_cpufreq_enable(void); 129b47b5b34SRafael Vanoni 130b47b5b34SRafael Vanoni static int 131b47b5b34SRafael Vanoni pt_cpufreq_setup(void) 132b47b5b34SRafael Vanoni { 133b47b5b34SRafael Vanoni if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL) 134*2d83778aSRafael Vanoni return (1); 135b47b5b34SRafael Vanoni 136b47b5b34SRafael Vanoni if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) { 137b47b5b34SRafael Vanoni free(dtp_argv); 138*2d83778aSRafael Vanoni return (1); 139b47b5b34SRafael Vanoni } 140b47b5b34SRafael Vanoni 141b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed); 142b47b5b34SRafael Vanoni 143636423dbSRafael Vanoni if (PT_ON_CPU) { 144b47b5b34SRafael Vanoni if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH)) 145b47b5b34SRafael Vanoni == NULL) { 146b47b5b34SRafael Vanoni free(dtp_argv[0]); 147b47b5b34SRafael Vanoni free(dtp_argv); 148*2d83778aSRafael Vanoni return (1); 149b47b5b34SRafael Vanoni } 150b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu); 151b47b5b34SRafael Vanoni } 152b47b5b34SRafael Vanoni 153b47b5b34SRafael Vanoni return (0); 154b47b5b34SRafael Vanoni } 155b47b5b34SRafael Vanoni 156b47b5b34SRafael Vanoni /* 157b47b5b34SRafael Vanoni * Perform setup necessary to enumerate and track CPU speed changes 158b47b5b34SRafael Vanoni */ 159b47b5b34SRafael Vanoni int 160b47b5b34SRafael Vanoni pt_cpufreq_stat_prepare(void) 161b47b5b34SRafael Vanoni { 162b47b5b34SRafael Vanoni dtrace_prog_t *prog; 163b47b5b34SRafael Vanoni dtrace_proginfo_t info; 164b47b5b34SRafael Vanoni dtrace_optval_t statustime; 165b47b5b34SRafael Vanoni kstat_ctl_t *kc; 166b47b5b34SRafael Vanoni kstat_t *ksp; 167b47b5b34SRafael Vanoni kstat_named_t *knp; 168b47b5b34SRafael Vanoni freq_state_info_t *state; 169b47b5b34SRafael Vanoni char *s, *token, *prog_ptr; 170b47b5b34SRafael Vanoni int err; 171b47b5b34SRafael Vanoni 172b47b5b34SRafael Vanoni if ((err = pt_cpufreq_setup()) != 0) { 173*2d83778aSRafael Vanoni pt_error("failed to setup %s report (couldn't allocate " 174*2d83778aSRafael Vanoni "memory)\n", g_msg_freq_state); 175b47b5b34SRafael Vanoni return (errno); 176b47b5b34SRafael Vanoni } 177b47b5b34SRafael Vanoni 178b47b5b34SRafael Vanoni state = g_pstate_info; 179b47b5b34SRafael Vanoni if ((g_cpu_power_states = calloc((size_t)g_ncpus, 180b47b5b34SRafael Vanoni sizeof (cpu_power_info_t))) == NULL) 181b47b5b34SRafael Vanoni return (-1); 182b47b5b34SRafael Vanoni 183b47b5b34SRafael Vanoni /* 184b47b5b34SRafael Vanoni * Enumerate the CPU frequencies 185b47b5b34SRafael Vanoni */ 186b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL) 187b47b5b34SRafael Vanoni return (errno); 188b47b5b34SRafael Vanoni 189b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL); 190b47b5b34SRafael Vanoni 191b47b5b34SRafael Vanoni if (ksp == NULL) { 192b47b5b34SRafael Vanoni err = errno; 193b47b5b34SRafael Vanoni (void) kstat_close(kc); 194b47b5b34SRafael Vanoni return (err); 195b47b5b34SRafael Vanoni } 196b47b5b34SRafael Vanoni 197b47b5b34SRafael Vanoni (void) kstat_read(kc, ksp, NULL); 198b47b5b34SRafael Vanoni 199b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "supported_frequencies_Hz"); 200b47b5b34SRafael Vanoni s = knp->value.str.addr.ptr; 201b47b5b34SRafael Vanoni 202b47b5b34SRafael Vanoni g_npstates = 0; 203b47b5b34SRafael Vanoni 204b47b5b34SRafael Vanoni for (token = strtok(s, ":"), s = NULL; 205*2d83778aSRafael Vanoni token != NULL && g_npstates < NSTATES; 206b47b5b34SRafael Vanoni token = strtok(NULL, ":")) { 207b47b5b34SRafael Vanoni 208b47b5b34SRafael Vanoni state->speed = HZ2MHZ(atoll(token)); 209b47b5b34SRafael Vanoni 210b47b5b34SRafael Vanoni if (state->speed > max_cpufreq) 211b47b5b34SRafael Vanoni max_cpufreq = state->speed; 212b47b5b34SRafael Vanoni 213b47b5b34SRafael Vanoni state->total_time = (uint64_t)0; 214b47b5b34SRafael Vanoni 215b47b5b34SRafael Vanoni g_npstates++; 216b47b5b34SRafael Vanoni state++; 217b47b5b34SRafael Vanoni } 218b47b5b34SRafael Vanoni 219b47b5b34SRafael Vanoni if (token != NULL) 220*2d83778aSRafael Vanoni pt_error("CPU exceeds the supported number of %s\n", 221*2d83778aSRafael Vanoni g_msg_freq_state); 222b47b5b34SRafael Vanoni 223b47b5b34SRafael Vanoni (void) kstat_close(kc); 224b47b5b34SRafael Vanoni 225b47b5b34SRafael Vanoni /* 226b47b5b34SRafael Vanoni * Return if speed transition is not supported 227b47b5b34SRafael Vanoni */ 228b47b5b34SRafael Vanoni if (g_npstates < 2) 229b47b5b34SRafael Vanoni return (-1); 230b47b5b34SRafael Vanoni 231b47b5b34SRafael Vanoni /* 232b47b5b34SRafael Vanoni * Setup DTrace to look for CPU frequency changes 233b47b5b34SRafael Vanoni */ 234b47b5b34SRafael Vanoni if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { 235*2d83778aSRafael Vanoni pt_error("cannot open dtrace library for the %s report: %s\n", 236*2d83778aSRafael Vanoni g_msg_freq_state, dtrace_errmsg(NULL, err)); 237b47b5b34SRafael Vanoni return (-2); 238b47b5b34SRafael Vanoni } 239b47b5b34SRafael Vanoni 240b47b5b34SRafael Vanoni /* 241b47b5b34SRafael Vanoni * Execute different scripts (defined above) depending on 242b47b5b34SRafael Vanoni * user specified options. Default mode uses dtp_cpufreq. 243b47b5b34SRafael Vanoni */ 244636423dbSRafael Vanoni if (PT_ON_CPU) 245b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq_c; 246b47b5b34SRafael Vanoni else 247b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq; 248b47b5b34SRafael Vanoni 249b47b5b34SRafael Vanoni if ((prog = dtrace_program_strcompile(dtp, prog_ptr, 250b47b5b34SRafael Vanoni DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) { 251*2d83778aSRafael Vanoni pt_error("failed to compile %s program\n", g_msg_freq_state); 252b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 253b47b5b34SRafael Vanoni } 254b47b5b34SRafael Vanoni 255b47b5b34SRafael Vanoni if (dtrace_program_exec(dtp, prog, &info) == -1) { 256*2d83778aSRafael Vanoni pt_error("failed to enable %s probes\n", g_msg_freq_state); 257b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 258b47b5b34SRafael Vanoni } 259b47b5b34SRafael Vanoni 260*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggsize", "128k") == -1) 261*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggsize'\n", g_msg_freq_state); 262b47b5b34SRafael Vanoni 263*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggrate", "0") == -1) 264*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggrate'\n", g_msg_freq_state); 265b47b5b34SRafael Vanoni 266*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggpercpu", 0) == -1) 267*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggpercpu'\n", g_msg_freq_state); 268b47b5b34SRafael Vanoni 269b47b5b34SRafael Vanoni if (dtrace_go(dtp) != 0) { 270*2d83778aSRafael Vanoni pt_error("failed to start %s observation\n", g_msg_freq_state); 271b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 272b47b5b34SRafael Vanoni } 273b47b5b34SRafael Vanoni 274b47b5b34SRafael Vanoni if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) { 275*2d83778aSRafael Vanoni pt_error("failed to get %s 'statusrate'\n", g_msg_freq_state); 276b47b5b34SRafael Vanoni return (dtrace_errno(dtp)); 277b47b5b34SRafael Vanoni } 278b47b5b34SRafael Vanoni 279b47b5b34SRafael Vanoni return (0); 280b47b5b34SRafael Vanoni } 281b47b5b34SRafael Vanoni 282b47b5b34SRafael Vanoni /* 283b47b5b34SRafael Vanoni * The DTrace probes have already been enabled, and are tracking 284b47b5b34SRafael Vanoni * CPU speed transitions. Take a snapshot of the aggregations, and 285b47b5b34SRafael Vanoni * look for any CPUs that have made a speed transition over the last 286b47b5b34SRafael Vanoni * sampling interval. Note that the aggregations may be empty if no 287b47b5b34SRafael Vanoni * speed transitions took place over the last interval. In that case, 288b47b5b34SRafael Vanoni * notate that we have already accounted for the time, so that when 289b47b5b34SRafael Vanoni * we do encounter a speed transition in a future sampling interval 290b47b5b34SRafael Vanoni * we can subtract that time back out. 291b47b5b34SRafael Vanoni */ 292b47b5b34SRafael Vanoni int 293b47b5b34SRafael Vanoni pt_cpufreq_stat_collect(double interval) 294b47b5b34SRafael Vanoni { 295b47b5b34SRafael Vanoni int i, ret; 296b47b5b34SRafael Vanoni 297b47b5b34SRafael Vanoni /* 298b47b5b34SRafael Vanoni * Zero out the interval time reported by DTrace for 299b47b5b34SRafael Vanoni * this interval 300b47b5b34SRafael Vanoni */ 301b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) 302b47b5b34SRafael Vanoni g_pstate_info[i].total_time = 0; 303b47b5b34SRafael Vanoni 304b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus; i++) 305b47b5b34SRafael Vanoni g_cpu_power_states[i].dtrace_time = 0; 306b47b5b34SRafael Vanoni 307b47b5b34SRafael Vanoni if (dtrace_status(dtp) == -1) 308b47b5b34SRafael Vanoni return (-1); 309b47b5b34SRafael Vanoni 310b47b5b34SRafael Vanoni if (dtrace_aggregate_snap(dtp) != 0) 311*2d83778aSRafael Vanoni pt_error("failed to collect data for %s\n", g_msg_freq_state); 312b47b5b34SRafael Vanoni 313b47b5b34SRafael Vanoni if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk, 314b47b5b34SRafael Vanoni NULL) != 0) 315*2d83778aSRafael Vanoni pt_error("failed to sort data for %s\n", g_msg_freq_state); 316b47b5b34SRafael Vanoni 317b47b5b34SRafael Vanoni dtrace_aggregate_clear(dtp); 318b47b5b34SRafael Vanoni 319b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot()) != 0) { 320*2d83778aSRafael Vanoni pt_error("failed to snapshot %s state\n", g_msg_freq_state); 321b47b5b34SRafael Vanoni return (ret); 322b47b5b34SRafael Vanoni } 323b47b5b34SRafael Vanoni 324b47b5b34SRafael Vanoni switch (g_op_mode) { 325636423dbSRafael Vanoni case PT_MODE_CPU: 326b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, g_observed_cpu); 327b47b5b34SRafael Vanoni break; 328636423dbSRafael Vanoni case PT_MODE_DEFAULT: 329b47b5b34SRafael Vanoni default: 330b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++) 331b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, i); 332b47b5b34SRafael Vanoni break; 333b47b5b34SRafael Vanoni } 334b47b5b34SRafael Vanoni 335b47b5b34SRafael Vanoni return (0); 336b47b5b34SRafael Vanoni } 337b47b5b34SRafael Vanoni 338b47b5b34SRafael Vanoni static void 339b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu) 340b47b5b34SRafael Vanoni { 341636423dbSRafael Vanoni cpu_power_info_t *cpu_pow; 342b47b5b34SRafael Vanoni uint64_t speed; 343b47b5b34SRafael Vanoni hrtime_t duration; 344b47b5b34SRafael Vanoni int i; 345b47b5b34SRafael Vanoni 346b47b5b34SRafael Vanoni cpu_pow = &g_cpu_power_states[cpu]; 347b47b5b34SRafael Vanoni speed = cpu_pow->current_pstate; 348b47b5b34SRafael Vanoni 349636423dbSRafael Vanoni duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time; 350636423dbSRafael Vanoni 351636423dbSRafael Vanoni /* 352636423dbSRafael Vanoni * 'duration' may be a negative value when we're using or forcing a 353636423dbSRafael Vanoni * small interval, and the amount of time already accounted ends up 354636423dbSRafael Vanoni * being larger than the the former. 355636423dbSRafael Vanoni */ 356636423dbSRafael Vanoni if (duration < 0) 357636423dbSRafael Vanoni return; 358b47b5b34SRafael Vanoni 359b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) { 360b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) { 361b47b5b34SRafael Vanoni g_pstate_info[i].total_time += duration; 362b47b5b34SRafael Vanoni cpu_pow->time_accounted += duration; 363636423dbSRafael Vanoni cpu_pow->speed_accounted = speed; 364b47b5b34SRafael Vanoni } 365b47b5b34SRafael Vanoni } 366b47b5b34SRafael Vanoni } 367b47b5b34SRafael Vanoni 368b47b5b34SRafael Vanoni /* 369b47b5b34SRafael Vanoni * Take a snapshot of each CPU's speed by looking through the cpu_info kstats. 370b47b5b34SRafael Vanoni */ 371b47b5b34SRafael Vanoni static int 372b47b5b34SRafael Vanoni pt_cpufreq_snapshot(void) 373b47b5b34SRafael Vanoni { 374b47b5b34SRafael Vanoni kstat_ctl_t *kc; 375b47b5b34SRafael Vanoni int ret; 376b47b5b34SRafael Vanoni uint_t i; 377b47b5b34SRafael Vanoni 378b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL) 379b47b5b34SRafael Vanoni return (errno); 380b47b5b34SRafael Vanoni 381b47b5b34SRafael Vanoni switch (g_op_mode) { 382636423dbSRafael Vanoni case PT_MODE_CPU: 383b47b5b34SRafael Vanoni ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu); 384b47b5b34SRafael Vanoni break; 385636423dbSRafael Vanoni case PT_MODE_DEFAULT: 386b47b5b34SRafael Vanoni default: 387b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++) 388b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0) 389b47b5b34SRafael Vanoni break; 390b47b5b34SRafael Vanoni break; 391b47b5b34SRafael Vanoni } 392b47b5b34SRafael Vanoni 393b47b5b34SRafael Vanoni if (kstat_close(kc) != 0) 394*2d83778aSRafael Vanoni pt_error("couldn't close %s kstat\n", g_msg_freq_state); 395b47b5b34SRafael Vanoni 396b47b5b34SRafael Vanoni return (ret); 397b47b5b34SRafael Vanoni } 398b47b5b34SRafael Vanoni 399b47b5b34SRafael Vanoni static int 400b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu) 401b47b5b34SRafael Vanoni { 402b47b5b34SRafael Vanoni kstat_t *ksp; 403b47b5b34SRafael Vanoni kstat_named_t *knp; 404b47b5b34SRafael Vanoni 405b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL); 406b47b5b34SRafael Vanoni if (ksp == NULL) { 407*2d83778aSRafael Vanoni pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while " 408*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state); 409b47b5b34SRafael Vanoni return (1); 410b47b5b34SRafael Vanoni } 411b47b5b34SRafael Vanoni 412b47b5b34SRafael Vanoni if (kstat_read(kc, ksp, NULL) == -1) { 413*2d83778aSRafael Vanoni pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while " 414*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state); 415b47b5b34SRafael Vanoni return (2); 416b47b5b34SRafael Vanoni } 417b47b5b34SRafael Vanoni 418b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "current_clock_Hz"); 419b47b5b34SRafael Vanoni if (knp == NULL) { 420*2d83778aSRafael Vanoni pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d " 421*2d83778aSRafael Vanoni "while taking a snapshot of %s\n", cpu, g_msg_freq_state); 422b47b5b34SRafael Vanoni return (3); 423b47b5b34SRafael Vanoni } 424b47b5b34SRafael Vanoni 425b47b5b34SRafael Vanoni g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64); 426b47b5b34SRafael Vanoni 427b47b5b34SRafael Vanoni return (0); 428b47b5b34SRafael Vanoni } 429b47b5b34SRafael Vanoni 430b47b5b34SRafael Vanoni /* 431b47b5b34SRafael Vanoni * DTrace aggregation walker that sorts through a snapshot of the 432b47b5b34SRafael Vanoni * aggregation data collected during firings of the cpu-change-speed 433b47b5b34SRafael Vanoni * probe. 434b47b5b34SRafael Vanoni */ 435b47b5b34SRafael Vanoni /*ARGSUSED*/ 436b47b5b34SRafael Vanoni static int 437b47b5b34SRafael Vanoni pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg) 438b47b5b34SRafael Vanoni { 439b47b5b34SRafael Vanoni dtrace_aggdesc_t *aggdesc = data->dtada_desc; 440b47b5b34SRafael Vanoni dtrace_recdesc_t *cpu_rec, *speed_rec; 441636423dbSRafael Vanoni cpu_power_info_t *cp; 442b47b5b34SRafael Vanoni int32_t cpu; 443b47b5b34SRafael Vanoni uint64_t speed; 444636423dbSRafael Vanoni hrtime_t res; 445b47b5b34SRafael Vanoni int i; 446b47b5b34SRafael Vanoni 447b47b5b34SRafael Vanoni if (strcmp(aggdesc->dtagd_name, "times") == 0) { 448b47b5b34SRafael Vanoni cpu_rec = &aggdesc->dtagd_rec[1]; 449b47b5b34SRafael Vanoni speed_rec = &aggdesc->dtagd_rec[2]; 450b47b5b34SRafael Vanoni 451b47b5b34SRafael Vanoni /* LINTED - alignment */ 452b47b5b34SRafael Vanoni cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset); 453636423dbSRafael Vanoni 454636423dbSRafael Vanoni /* LINTED - alignment */ 455636423dbSRafael Vanoni res = *((hrtime_t *)(data->dtada_percpu[cpu])); 456636423dbSRafael Vanoni 457b47b5b34SRafael Vanoni /* LINTED - alignment */ 458b47b5b34SRafael Vanoni speed = *(uint64_t *)(data->dtada_data + 459b47b5b34SRafael Vanoni speed_rec->dtrd_offset); 460b47b5b34SRafael Vanoni 461636423dbSRafael Vanoni if (speed == 0) 462b47b5b34SRafael Vanoni speed = max_cpufreq; 463636423dbSRafael Vanoni else 464636423dbSRafael Vanoni speed = HZ2MHZ(speed); 465b47b5b34SRafael Vanoni 466b47b5b34SRafael Vanoni /* 467b47b5b34SRafael Vanoni * We have an aggregation record for "cpu" being at "speed" 468b47b5b34SRafael Vanoni * for an interval of "n" nanoseconds. The reported interval 469b47b5b34SRafael Vanoni * may exceed the powertop sampling interval, since we only 470b47b5b34SRafael Vanoni * notice during potentially infrequent firings of the 471b47b5b34SRafael Vanoni * "speed change" DTrace probe. In this case powertop would 472b47b5b34SRafael Vanoni * have already accounted for the portions of the interval 473b47b5b34SRafael Vanoni * that happened during prior powertop samplings, so subtract 474b47b5b34SRafael Vanoni * out time already accounted. 475b47b5b34SRafael Vanoni */ 476636423dbSRafael Vanoni cp = &g_cpu_power_states[cpu]; 477b47b5b34SRafael Vanoni 478b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) { 479b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) { 480636423dbSRafael Vanoni 481636423dbSRafael Vanoni if (cp->time_accounted > 0 && 482636423dbSRafael Vanoni cp->speed_accounted == speed) { 483636423dbSRafael Vanoni if (res > cp->time_accounted) { 484636423dbSRafael Vanoni res -= cp->time_accounted; 485636423dbSRafael Vanoni cp->time_accounted = 0; 486636423dbSRafael Vanoni cp->speed_accounted = 0; 487636423dbSRafael Vanoni } else { 488636423dbSRafael Vanoni return (DTRACE_AGGWALK_NEXT); 489b47b5b34SRafael Vanoni } 490b47b5b34SRafael Vanoni } 491636423dbSRafael Vanoni 492636423dbSRafael Vanoni g_pstate_info[i].total_time += res; 493636423dbSRafael Vanoni cp->dtrace_time += res; 494b47b5b34SRafael Vanoni } 495b47b5b34SRafael Vanoni } 496b47b5b34SRafael Vanoni } 497636423dbSRafael Vanoni 498b47b5b34SRafael Vanoni return (DTRACE_AGGWALK_NEXT); 499b47b5b34SRafael Vanoni } 500b47b5b34SRafael Vanoni 501b47b5b34SRafael Vanoni /* 5029bbf5ba1SRafael Vanoni * Checks if PM is enabled in /etc/power.conf, enabling if not 5039bbf5ba1SRafael Vanoni */ 5049bbf5ba1SRafael Vanoni void 5059bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void) 5069bbf5ba1SRafael Vanoni { 5079bbf5ba1SRafael Vanoni int ret = pt_cpufreq_check_pm(); 5089bbf5ba1SRafael Vanoni 5099bbf5ba1SRafael Vanoni switch (ret) { 5109bbf5ba1SRafael Vanoni case 0: 5119bbf5ba1SRafael Vanoni pt_sugg_add("Suggestion: enable CPU power management by " 5129bbf5ba1SRafael Vanoni "pressing the P key", 40, 'P', (char *)g_msg_freq_enable, 5139bbf5ba1SRafael Vanoni pt_cpufreq_enable); 5149bbf5ba1SRafael Vanoni break; 5159bbf5ba1SRafael Vanoni } 5169bbf5ba1SRafael Vanoni } 5179bbf5ba1SRafael Vanoni 5189bbf5ba1SRafael Vanoni /* 5199bbf5ba1SRafael Vanoni * Checks /etc/power.conf and returns: 5209bbf5ba1SRafael Vanoni * 5219bbf5ba1SRafael Vanoni * 0 if CPUPM is not enabled 5229bbf5ba1SRafael Vanoni * 1 if there's nothing for us to do because: 5239bbf5ba1SRafael Vanoni * (a) the system does not support frequency scaling 5249bbf5ba1SRafael Vanoni * (b) there's no power.conf. 5259bbf5ba1SRafael Vanoni * 2 if CPUPM is enabled 5269bbf5ba1SRafael Vanoni * 3 if the system is running in poll-mode, as opposed to event-mode 5279bbf5ba1SRafael Vanoni * 5289bbf5ba1SRafael Vanoni * Notice the ordering of the return values, they will be picked up and 5299bbf5ba1SRafael Vanoni * switched upon ascendingly. 5309bbf5ba1SRafael Vanoni */ 5319bbf5ba1SRafael Vanoni static int 5329bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void) 5339bbf5ba1SRafael Vanoni { 5349bbf5ba1SRafael Vanoni char line[1024]; 5359bbf5ba1SRafael Vanoni FILE *file; 5369bbf5ba1SRafael Vanoni int ret = 0; 5379bbf5ba1SRafael Vanoni 5389bbf5ba1SRafael Vanoni if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL) 5399bbf5ba1SRafael Vanoni return (1); 5409bbf5ba1SRafael Vanoni 5419bbf5ba1SRafael Vanoni (void) memset(line, 0, 1024); 5429bbf5ba1SRafael Vanoni 5439bbf5ba1SRafael Vanoni while (fgets(line, 1024, file)) { 5449bbf5ba1SRafael Vanoni if (strstr(line, "cpupm")) { 5459bbf5ba1SRafael Vanoni if (strstr(line, "enable")) { 5469bbf5ba1SRafael Vanoni (void) fclose(file); 5479bbf5ba1SRafael Vanoni return (2); 5489bbf5ba1SRafael Vanoni } 5499bbf5ba1SRafael Vanoni } 5509bbf5ba1SRafael Vanoni if (strstr(line, "poll")) 5519bbf5ba1SRafael Vanoni ret = 3; 5529bbf5ba1SRafael Vanoni } 5539bbf5ba1SRafael Vanoni 5549bbf5ba1SRafael Vanoni (void) fclose(file); 5559bbf5ba1SRafael Vanoni 5569bbf5ba1SRafael Vanoni return (ret); 5579bbf5ba1SRafael Vanoni } 5589bbf5ba1SRafael Vanoni 5599bbf5ba1SRafael Vanoni /* 560b47b5b34SRafael Vanoni * Used as a suggestion, sets PM in /etc/power.conf and 561b47b5b34SRafael Vanoni * a 1sec threshold, then calls /usr/sbin/pmconfig 562b47b5b34SRafael Vanoni */ 5639bbf5ba1SRafael Vanoni static void 5649bbf5ba1SRafael Vanoni pt_cpufreq_enable(void) 565b47b5b34SRafael Vanoni { 566b47b5b34SRafael Vanoni (void) system(cpupm_enable); 567b47b5b34SRafael Vanoni (void) system(cpupm_treshold); 568b47b5b34SRafael Vanoni (void) system(default_pmconf); 569b47b5b34SRafael Vanoni 5709bbf5ba1SRafael Vanoni if (pt_sugg_remove(pt_cpufreq_enable) == 0) 571*2d83778aSRafael Vanoni pt_error("failed to remove a %s suggestion\n", 572*2d83778aSRafael Vanoni g_msg_freq_state); 573b47b5b34SRafael Vanoni } 574