1 /*
2 * Copyright 2009, Intel Corporation
3 * Copyright 2009, Sun Microsystems, Inc
4 *
5 * This file is part of PowerTOP
6 *
7 * This program file is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program in a file named COPYING; if not, write to the
18 * Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301 USA
21 *
22 * Authors:
23 * Arjan van de Ven <arjan@linux.intel.com>
24 * Eric C Saxe <eric.saxe@sun.com>
25 * Aubrey Li <aubrey.li@intel.com>
26 */
27
28 /*
29 * GPL Disclaimer
30 *
31 * For the avoidance of doubt, except that if any license choice other
32 * than GPL or LGPL is available it will apply instead, Sun elects to
33 * use only the General Public License version 2 (GPLv2) at this time
34 * for any software where a choice of GPL license versions is made
35 * available with the language indicating that GPLv2 or any later
36 * version may be used, or where a choice of which version of the GPL
37 * is applied is otherwise unspecified.
38 */
39
40 #include <string.h>
41 #include <dtrace.h>
42 #include "powertop.h"
43
44 #define S2NS(x) ((x) * (NANOSEC))
45
46 static dtrace_hdl_t *dtp;
47
48 /*
49 * Buffer containing DTrace program to track CPU idle state transitions
50 */
51 static const char *dtp_cpuidle =
52 ":::idle-state-transition"
53 "/arg0 != 0/"
54 "{"
55 " self->start = timestamp;"
56 " self->state = arg0;"
57 "}"
58 ""
59 ":::idle-state-transition"
60 "/arg0 == 0 && self->start/"
61 "{"
62 " @number[self->state] = count();"
63 " @times[self->state] = sum(timestamp - self->start);"
64 " self->start = 0;"
65 " self->state = 0;"
66 "}";
67
68 /*
69 * Same as above but only for a specific CPU
70 */
71 static const char *dtp_cpuidle_c =
72 ":::idle-state-transition"
73 "/cpu == $0 &&"
74 " arg0 != 0/"
75 "{"
76 " self->start = timestamp;"
77 " self->state = arg0;"
78 "}"
79 ""
80 ":::idle-state-transition"
81 "/cpu == $0 &&"
82 " arg0 == 0 && self->start/"
83 "{"
84 " @number[self->state] = count();"
85 " @times[self->state] = sum(timestamp - self->start);"
86 " self->start = 0;"
87 " self->state = 0;"
88 "}";
89
90 static int pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *, void *);
91
92 /*
93 * Perform setup necessary to track CPU idle state transitions
94 */
95 int
pt_cpuidle_stat_prepare(void)96 pt_cpuidle_stat_prepare(void)
97 {
98 dtrace_prog_t *prog;
99 dtrace_proginfo_t info;
100 dtrace_optval_t statustime;
101 int err;
102 char *prog_ptr;
103
104 if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
105 pt_error("cannot open dtrace library for the %s report: %s\n",
106 g_msg_idle_state, dtrace_errmsg(NULL, err));
107 return (-1);
108 }
109
110 /*
111 * Execute different scripts (defined above) depending on
112 * user specified options.
113 */
114 if (PT_ON_CPU)
115 prog_ptr = (char *)dtp_cpuidle_c;
116 else
117 prog_ptr = (char *)dtp_cpuidle;
118
119 if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
120 DTRACE_PROBESPEC_NAME, 0, g_argc, g_argv)) == NULL) {
121 pt_error("failed to compile %s program\n", g_msg_idle_state);
122 return (dtrace_errno(dtp));
123 }
124
125 if (dtrace_program_exec(dtp, prog, &info) == -1) {
126 pt_error("failed to enable %s probes\n", g_msg_idle_state);
127 return (dtrace_errno(dtp));
128 }
129
130 if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
131 pt_error("failed to set %s 'aggsize'\n", g_msg_idle_state);
132
133 if (dtrace_setopt(dtp, "aggrate", "0") == -1)
134 pt_error("failed to set %s 'aggrate'\n", g_msg_idle_state);
135
136 if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
137 pt_error("failed to set %s 'aggpercpu'\n", g_msg_idle_state);
138
139 if (dtrace_go(dtp) != 0) {
140 pt_error("failed to start %s observation\n", g_msg_idle_state);
141 return (dtrace_errno(dtp));
142 }
143
144 if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
145 pt_error("failed to get %s 'statusrate'\n", g_msg_idle_state);
146 return (dtrace_errno(dtp));
147 }
148
149 return (0);
150 }
151
152 /*
153 * The DTrace probes have been enabled, and are tracking CPU idle state
154 * transitions. Take a snapshot of the aggregations, and invoke the aggregation
155 * walker to process any records. The walker does most of the accounting work
156 * chalking up time spent into the g_cstate_info structure.
157 */
158 int
pt_cpuidle_stat_collect(double interval)159 pt_cpuidle_stat_collect(double interval)
160 {
161 int i;
162 hrtime_t t = 0;
163
164 /*
165 * Assume that all the time spent in this interval will
166 * be the default "0" state. The DTrace walker will reallocate
167 * time out of the default bucket as it processes aggregation
168 * records for time spent in other states.
169 */
170 g_cstate_info[0].total_time = (uint64_t)S2NS(interval *
171 g_ncpus_observed);
172
173 if (dtrace_status(dtp) == -1)
174 return (-1);
175
176 if (dtrace_aggregate_snap(dtp) != 0)
177 pt_error("failed to collect data for %s\n", g_msg_idle_state);
178
179 if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpuidle_dtrace_walk,
180 NULL) != 0)
181 pt_error("failed to sort %s data\n", g_msg_idle_state);
182
183 dtrace_aggregate_clear(dtp);
184
185 /*
186 * Populate g_cstate_info with the correct amount of time spent
187 * in each C state and update the number of C states in g_max_cstate
188 */
189 g_total_c_time = 0;
190 for (i = 0; i < NSTATES; i++) {
191 if (g_cstate_info[i].total_time > 0) {
192 g_total_c_time += g_cstate_info[i].total_time;
193 if (i > g_max_cstate)
194 g_max_cstate = i;
195 if (g_cstate_info[i].last_time > t) {
196 t = g_cstate_info[i].last_time;
197 g_longest_cstate = i;
198 }
199 }
200 }
201
202 return (0);
203 }
204
205 /*
206 * DTrace aggregation walker that sorts through a snapshot of data records
207 * collected during firings of the idle-state-transition probe.
208 *
209 * XXX A way of querying the current idle state for a CPU is needed in addition
210 * to logic similar to that in cpufreq.c
211 */
212 /*ARGSUSED*/
213 static int
pt_cpuidle_dtrace_walk(const dtrace_aggdata_t * data,void * arg)214 pt_cpuidle_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
215 {
216 dtrace_aggdesc_t *aggdesc = data->dtada_desc;
217 dtrace_recdesc_t *rec;
218 uint64_t n = 0, state;
219 int i;
220
221 rec = &aggdesc->dtagd_rec[1];
222
223 switch (g_bit_depth) {
224 case 32:
225 /* LINTED - alignment */
226 state = *(uint32_t *)(data->dtada_data +
227 rec->dtrd_offset);
228 break;
229 case 64:
230 /* LINTED - alignment */
231 state = *(uint64_t *)(data->dtada_data +
232 rec->dtrd_offset);
233 break;
234 }
235
236 if (strcmp(aggdesc->dtagd_name, "number") == 0) {
237 for (i = 0; i < g_ncpus; i++) {
238 /* LINTED - alignment */
239 n += *((uint64_t *)(data->dtada_percpu[i]));
240 }
241 g_total_events += n;
242 g_cstate_info[state].events += n;
243 }
244 else
245 if (strcmp(aggdesc->dtagd_name, "times") == 0) {
246 for (i = 0; i < g_ncpus; i++) {
247 /* LINTED - alignment */
248 n += *((uint64_t *)(data->dtada_percpu[i]));
249 }
250 g_cstate_info[state].last_time = n;
251 g_cstate_info[state].total_time += n;
252 if (g_cstate_info[0].total_time >= n)
253 g_cstate_info[0].total_time -= n;
254 else
255 g_cstate_info[0].total_time = 0;
256 }
257
258 return (DTRACE_AGGWALK_NEXT);
259 }
260