1 /* 2 * Copyright 2009, Intel Corporation 3 * Copyright 2009, Sun Microsystems, Inc 4 * 5 * This file is part of PowerTOP 6 * 7 * This program file is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program in a file named COPYING; if not, write to the 18 * Free Software Foundation, Inc., 19 * 51 Franklin Street, Fifth Floor, 20 * Boston, MA 02110-1301 USA 21 * 22 * Authors: 23 * Arjan van de Ven <arjan@linux.intel.com> 24 * Eric C Saxe <eric.saxe@sun.com> 25 * Aubrey Li <aubrey.li@intel.com> 26 */ 27 28 /* 29 * GPL Disclaimer 30 * 31 * For the avoidance of doubt, except that if any license choice other 32 * than GPL or LGPL is available it will apply instead, Sun elects to 33 * use only the General Public License version 2 (GPLv2) at this time 34 * for any software where a choice of GPL license versions is made 35 * available with the language indicating that GPLv2 or any later 36 * version may be used, or where a choice of which version of the GPL 37 * is applied is otherwise unspecified. 38 */ 39 40 #include <string.h> 41 #include <dtrace.h> 42 #include "powertop.h" 43 44 #define S2NS(x) ((x) * (NANOSEC)) 45 46 static dtrace_hdl_t *dtp; 47 48 /* 49 * Buffer containing DTrace program to track CPU idle state transitions 50 */ 51 static const char *dtp_cpuidle = 52 ":::idle-state-transition" 53 "/arg0 != 0/" 54 "{" 55 " self->start = timestamp;" 56 " self->state = arg0;" 57 "}" 58 "" 59 ":::idle-state-transition" 60 "/arg0 == 0 && self->start/" 61 "{" 62 " @number[self->state] = count();" 63 " @times[self->state] = sum(timestamp - self->start);" 64 " self->start = 0;" 65 " self->state = 0;" 66 "}"; 67 68 /* 69 * Same as above but only for a specific CPU 70 */ 71 static const char *dtp_cpuidle_c = 72 ":::idle-state-transition" 73 "/cpu == $0 &&" 74 " arg0 != 0/" 75 "{" 76 " self->start = timestamp;" 77 " self->state = arg0;" 78 "}" 79 "" 80 ":::idle-state-transition" 81 "/cpu == $0 &&" 82 " arg0 == 0 && self->start/" 83 "{" 84 " @number[self->state] = count();" 85 " @times[self->state] = sum(timestamp - self->start);" 86 " self->start = 0;" 87 " self->state = 0;" 88 "}"; 89 90 static int pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *, void *); 91 92 /* 93 * Perform setup necessary to track CPU idle state transitions 94 */ 95 int 96 pt_cpuidle_stat_prepare(void) 97 { 98 dtrace_prog_t *prog; 99 dtrace_proginfo_t info; 100 dtrace_optval_t statustime; 101 int err; 102 char *prog_ptr; 103 104 if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) { 105 pt_error("cannot open dtrace library for the %s report: %s\n", 106 g_msg_idle_state, dtrace_errmsg(NULL, err)); 107 return (-1); 108 } 109 110 /* 111 * Execute different scripts (defined above) depending on 112 * user specified options. 113 */ 114 if (PT_ON_CPU) 115 prog_ptr = (char *)dtp_cpuidle_c; 116 else 117 prog_ptr = (char *)dtp_cpuidle; 118 119 if ((prog = dtrace_program_strcompile(dtp, prog_ptr, 120 DTRACE_PROBESPEC_NAME, 0, g_argc, g_argv)) == NULL) { 121 pt_error("failed to compile %s program\n", g_msg_idle_state); 122 return (dtrace_errno(dtp)); 123 } 124 125 if (dtrace_program_exec(dtp, prog, &info) == -1) { 126 pt_error("failed to enable %s probes\n", g_msg_idle_state); 127 return (dtrace_errno(dtp)); 128 } 129 130 if (dtrace_setopt(dtp, "aggsize", "128k") == -1) 131 pt_error("failed to set %s 'aggsize'\n", g_msg_idle_state); 132 133 if (dtrace_setopt(dtp, "aggrate", "0") == -1) 134 pt_error("failed to set %s 'aggrate'\n", g_msg_idle_state); 135 136 if (dtrace_setopt(dtp, "aggpercpu", 0) == -1) 137 pt_error("failed to set %s 'aggpercpu'\n", g_msg_idle_state); 138 139 if (dtrace_go(dtp) != 0) { 140 pt_error("failed to start %s observation\n", g_msg_idle_state); 141 return (dtrace_errno(dtp)); 142 } 143 144 if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) { 145 pt_error("failed to get %s 'statusrate'\n", g_msg_idle_state); 146 return (dtrace_errno(dtp)); 147 } 148 149 return (0); 150 } 151 152 /* 153 * The DTrace probes have been enabled, and are tracking CPU idle state 154 * transitions. Take a snapshot of the aggregations, and invoke the aggregation 155 * walker to process any records. The walker does most of the accounting work 156 * chalking up time spent into the g_cstate_info structure. 157 */ 158 int 159 pt_cpuidle_stat_collect(double interval) 160 { 161 int i; 162 hrtime_t t = 0; 163 164 /* 165 * Assume that all the time spent in this interval will 166 * be the default "0" state. The DTrace walker will reallocate 167 * time out of the default bucket as it processes aggregation 168 * records for time spent in other states. 169 */ 170 g_cstate_info[0].total_time = (uint64_t)S2NS(interval * 171 g_ncpus_observed); 172 173 if (dtrace_status(dtp) == -1) 174 return (-1); 175 176 if (dtrace_aggregate_snap(dtp) != 0) 177 pt_error("failed to collect data for %s\n", g_msg_idle_state); 178 179 if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpuidle_dtrace_walk, 180 NULL) != 0) 181 pt_error("failed to sort %s data\n", g_msg_idle_state); 182 183 dtrace_aggregate_clear(dtp); 184 185 /* 186 * Populate g_cstate_info with the correct amount of time spent 187 * in each C state and update the number of C states in g_max_cstate 188 */ 189 g_total_c_time = 0; 190 for (i = 0; i < NSTATES; i++) { 191 if (g_cstate_info[i].total_time > 0) { 192 g_total_c_time += g_cstate_info[i].total_time; 193 if (i > g_max_cstate) 194 g_max_cstate = i; 195 if (g_cstate_info[i].last_time > t) { 196 t = g_cstate_info[i].last_time; 197 g_longest_cstate = i; 198 } 199 } 200 } 201 202 return (0); 203 } 204 205 /* 206 * DTrace aggregation walker that sorts through a snapshot of data records 207 * collected during firings of the idle-state-transition probe. 208 * 209 * XXX A way of querying the current idle state for a CPU is needed in addition 210 * to logic similar to that in cpufreq.c 211 */ 212 /*ARGSUSED*/ 213 static int 214 pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *data, void *arg) 215 { 216 dtrace_aggdesc_t *aggdesc = data->dtada_desc; 217 dtrace_recdesc_t *rec; 218 uint64_t n = 0, state; 219 int i; 220 221 rec = &aggdesc->dtagd_rec[1]; 222 223 switch (g_bit_depth) { 224 case 32: 225 /* LINTED - alignment */ 226 state = *(uint32_t *)(data->dtada_data + 227 rec->dtrd_offset); 228 break; 229 case 64: 230 /* LINTED - alignment */ 231 state = *(uint64_t *)(data->dtada_data + 232 rec->dtrd_offset); 233 break; 234 } 235 236 if (strcmp(aggdesc->dtagd_name, "number") == 0) { 237 for (i = 0; i < g_ncpus; i++) { 238 /* LINTED - alignment */ 239 n += *((uint64_t *)(data->dtada_percpu[i])); 240 } 241 g_total_events += n; 242 g_cstate_info[state].events += n; 243 } 244 else 245 if (strcmp(aggdesc->dtagd_name, "times") == 0) { 246 for (i = 0; i < g_ncpus; i++) { 247 /* LINTED - alignment */ 248 n += *((uint64_t *)(data->dtada_percpu[i])); 249 } 250 g_cstate_info[state].last_time = n; 251 g_cstate_info[state].total_time += n; 252 if (g_cstate_info[0].total_time >= n) 253 g_cstate_info[0].total_time -= n; 254 else 255 g_cstate_info[0].total_time = 0; 256 } 257 258 return (DTRACE_AGGWALK_NEXT); 259 } 260