1b47b5b34SRafael Vanoni /*
2b47b5b34SRafael Vanoni * Copyright 2009, Intel Corporation
3b47b5b34SRafael Vanoni * Copyright 2009, Sun Microsystems, Inc
4b47b5b34SRafael Vanoni *
5b47b5b34SRafael Vanoni * This file is part of PowerTOP
6b47b5b34SRafael Vanoni *
7b47b5b34SRafael Vanoni * This program file is free software; you can redistribute it and/or modify it
8b47b5b34SRafael Vanoni * under the terms of the GNU General Public License as published by the
9b47b5b34SRafael Vanoni * Free Software Foundation; version 2 of the License.
10b47b5b34SRafael Vanoni *
11b47b5b34SRafael Vanoni * This program is distributed in the hope that it will be useful, but WITHOUT
12b47b5b34SRafael Vanoni * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13b47b5b34SRafael Vanoni * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14b47b5b34SRafael Vanoni * for more details.
15b47b5b34SRafael Vanoni *
16b47b5b34SRafael Vanoni * You should have received a copy of the GNU General Public License
17b47b5b34SRafael Vanoni * along with this program in a file named COPYING; if not, write to the
18b47b5b34SRafael Vanoni * Free Software Foundation, Inc.,
19b47b5b34SRafael Vanoni * 51 Franklin Street, Fifth Floor,
20b47b5b34SRafael Vanoni * Boston, MA 02110-1301 USA
21b47b5b34SRafael Vanoni *
22b47b5b34SRafael Vanoni * Authors:
23b47b5b34SRafael Vanoni * Arjan van de Ven <arjan@linux.intel.com>
24b47b5b34SRafael Vanoni * Eric C Saxe <eric.saxe@sun.com>
25b47b5b34SRafael Vanoni * Aubrey Li <aubrey.li@intel.com>
26b47b5b34SRafael Vanoni */
27b47b5b34SRafael Vanoni
28b47b5b34SRafael Vanoni /*
29b47b5b34SRafael Vanoni * GPL Disclaimer
30b47b5b34SRafael Vanoni *
31b47b5b34SRafael Vanoni * For the avoidance of doubt, except that if any license choice other
32b47b5b34SRafael Vanoni * than GPL or LGPL is available it will apply instead, Sun elects to
33b47b5b34SRafael Vanoni * use only the General Public License version 2 (GPLv2) at this time
34b47b5b34SRafael Vanoni * for any software where a choice of GPL license versions is made
35b47b5b34SRafael Vanoni * available with the language indicating that GPLv2 or any later
36b47b5b34SRafael Vanoni * version may be used, or where a choice of which version of the GPL
37b47b5b34SRafael Vanoni * is applied is otherwise unspecified.
38b47b5b34SRafael Vanoni */
39b47b5b34SRafael Vanoni
40b47b5b34SRafael Vanoni #include <stdlib.h>
41b47b5b34SRafael Vanoni #include <string.h>
42b47b5b34SRafael Vanoni #include <dtrace.h>
43b47b5b34SRafael Vanoni #include <kstat.h>
44b47b5b34SRafael Vanoni #include <errno.h>
45b47b5b34SRafael Vanoni #include "powertop.h"
46b47b5b34SRafael Vanoni
47636423dbSRafael Vanoni #define HZ2MHZ(speed) ((speed) / MICROSEC)
48b47b5b34SRafael Vanoni #define DTP_ARG_COUNT 2
49b47b5b34SRafael Vanoni #define DTP_ARG_LENGTH 5
50b47b5b34SRafael Vanoni
51b47b5b34SRafael Vanoni static uint64_t max_cpufreq = 0;
52b47b5b34SRafael Vanoni static dtrace_hdl_t *dtp;
53b47b5b34SRafael Vanoni static char **dtp_argv;
54b47b5b34SRafael Vanoni
55b47b5b34SRafael Vanoni /*
56b47b5b34SRafael Vanoni * Enabling PM through /etc/power.conf
579bbf5ba1SRafael Vanoni * See pt_cpufreq_suggest()
58b47b5b34SRafael Vanoni */
59b47b5b34SRafael Vanoni static char default_conf[] = "/etc/power.conf";
60b47b5b34SRafael Vanoni static char default_pmconf[] = "/usr/sbin/pmconfig";
61b47b5b34SRafael Vanoni static char cpupm_enable[] = "echo cpupm enable >> /etc/power.conf";
62b47b5b34SRafael Vanoni static char cpupm_treshold[] = "echo cpu-threshold 1s >> /etc/power.conf";
63b47b5b34SRafael Vanoni
64b47b5b34SRafael Vanoni /*
65b47b5b34SRafael Vanoni * Buffer containing DTrace program to track CPU frequency transitions
66b47b5b34SRafael Vanoni */
67b47b5b34SRafael Vanoni static const char *dtp_cpufreq =
68b47b5b34SRafael Vanoni "hrtime_t last[$0];"
69b47b5b34SRafael Vanoni ""
70b47b5b34SRafael Vanoni "BEGIN"
71b47b5b34SRafael Vanoni "{"
72b47b5b34SRafael Vanoni " begin = timestamp;"
73b47b5b34SRafael Vanoni "}"
74b47b5b34SRafael Vanoni ""
75b47b5b34SRafael Vanoni ":::cpu-change-speed"
76b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] != 0/"
77b47b5b34SRafael Vanoni "{"
78b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
79636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
80b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
81b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;"
82b47b5b34SRafael Vanoni "}"
83b47b5b34SRafael Vanoni ":::cpu-change-speed"
84b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] == 0/"
85b47b5b34SRafael Vanoni "{"
86b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
87636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
88b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
89b47b5b34SRafael Vanoni " last[this->cpu] = timestamp;"
90b47b5b34SRafael Vanoni "}";
91b47b5b34SRafael Vanoni
92b47b5b34SRafael Vanoni /*
93b47b5b34SRafael Vanoni * Same as above, but only for a specific CPU
94b47b5b34SRafael Vanoni */
95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c =
96b47b5b34SRafael Vanoni "hrtime_t last;"
97b47b5b34SRafael Vanoni ""
98b47b5b34SRafael Vanoni "BEGIN"
99b47b5b34SRafael Vanoni "{"
100b47b5b34SRafael Vanoni " begin = timestamp;"
101b47b5b34SRafael Vanoni "}"
102b47b5b34SRafael Vanoni ""
103b47b5b34SRafael Vanoni ":::cpu-change-speed"
104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
105b47b5b34SRafael Vanoni " last != 0/"
106b47b5b34SRafael Vanoni "{"
107b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
108636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
109b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - last);"
110b47b5b34SRafael Vanoni " last = timestamp;"
111b47b5b34SRafael Vanoni "}"
112b47b5b34SRafael Vanoni ":::cpu-change-speed"
113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
114b47b5b34SRafael Vanoni " last == 0/"
115b47b5b34SRafael Vanoni "{"
116b47b5b34SRafael Vanoni " this->cpu = (processorid_t)arg0;"
117636423dbSRafael Vanoni " this->oldspeed = (uint64_t)arg1;"
118b47b5b34SRafael Vanoni " @times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
119b47b5b34SRafael Vanoni " last = timestamp;"
120b47b5b34SRafael Vanoni "}";
121b47b5b34SRafael Vanoni
122b47b5b34SRafael Vanoni static int pt_cpufreq_setup(void);
123b47b5b34SRafael Vanoni static int pt_cpufreq_snapshot(void);
124b47b5b34SRafael Vanoni static int pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
125b47b5b34SRafael Vanoni static void pt_cpufreq_stat_account(double, uint_t);
1269bbf5ba1SRafael Vanoni static int pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t);
1279bbf5ba1SRafael Vanoni static int pt_cpufreq_check_pm(void);
1289bbf5ba1SRafael Vanoni static void pt_cpufreq_enable(void);
129b47b5b34SRafael Vanoni
130b47b5b34SRafael Vanoni static int
pt_cpufreq_setup(void)131b47b5b34SRafael Vanoni pt_cpufreq_setup(void)
132b47b5b34SRafael Vanoni {
133b47b5b34SRafael Vanoni if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
134*2d83778aSRafael Vanoni return (1);
135b47b5b34SRafael Vanoni
136b47b5b34SRafael Vanoni if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
137b47b5b34SRafael Vanoni free(dtp_argv);
138*2d83778aSRafael Vanoni return (1);
139b47b5b34SRafael Vanoni }
140b47b5b34SRafael Vanoni
141b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
142b47b5b34SRafael Vanoni
143636423dbSRafael Vanoni if (PT_ON_CPU) {
144b47b5b34SRafael Vanoni if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
145b47b5b34SRafael Vanoni == NULL) {
146b47b5b34SRafael Vanoni free(dtp_argv[0]);
147b47b5b34SRafael Vanoni free(dtp_argv);
148*2d83778aSRafael Vanoni return (1);
149b47b5b34SRafael Vanoni }
150b47b5b34SRafael Vanoni (void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
151b47b5b34SRafael Vanoni }
152b47b5b34SRafael Vanoni
153b47b5b34SRafael Vanoni return (0);
154b47b5b34SRafael Vanoni }
155b47b5b34SRafael Vanoni
156b47b5b34SRafael Vanoni /*
157b47b5b34SRafael Vanoni * Perform setup necessary to enumerate and track CPU speed changes
158b47b5b34SRafael Vanoni */
159b47b5b34SRafael Vanoni int
pt_cpufreq_stat_prepare(void)160b47b5b34SRafael Vanoni pt_cpufreq_stat_prepare(void)
161b47b5b34SRafael Vanoni {
162b47b5b34SRafael Vanoni dtrace_prog_t *prog;
163b47b5b34SRafael Vanoni dtrace_proginfo_t info;
164b47b5b34SRafael Vanoni dtrace_optval_t statustime;
165b47b5b34SRafael Vanoni kstat_ctl_t *kc;
166b47b5b34SRafael Vanoni kstat_t *ksp;
167b47b5b34SRafael Vanoni kstat_named_t *knp;
168b47b5b34SRafael Vanoni freq_state_info_t *state;
169b47b5b34SRafael Vanoni char *s, *token, *prog_ptr;
170b47b5b34SRafael Vanoni int err;
171b47b5b34SRafael Vanoni
172b47b5b34SRafael Vanoni if ((err = pt_cpufreq_setup()) != 0) {
173*2d83778aSRafael Vanoni pt_error("failed to setup %s report (couldn't allocate "
174*2d83778aSRafael Vanoni "memory)\n", g_msg_freq_state);
175b47b5b34SRafael Vanoni return (errno);
176b47b5b34SRafael Vanoni }
177b47b5b34SRafael Vanoni
178b47b5b34SRafael Vanoni state = g_pstate_info;
179b47b5b34SRafael Vanoni if ((g_cpu_power_states = calloc((size_t)g_ncpus,
180b47b5b34SRafael Vanoni sizeof (cpu_power_info_t))) == NULL)
181b47b5b34SRafael Vanoni return (-1);
182b47b5b34SRafael Vanoni
183b47b5b34SRafael Vanoni /*
184b47b5b34SRafael Vanoni * Enumerate the CPU frequencies
185b47b5b34SRafael Vanoni */
186b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL)
187b47b5b34SRafael Vanoni return (errno);
188b47b5b34SRafael Vanoni
189b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
190b47b5b34SRafael Vanoni
191b47b5b34SRafael Vanoni if (ksp == NULL) {
192b47b5b34SRafael Vanoni err = errno;
193b47b5b34SRafael Vanoni (void) kstat_close(kc);
194b47b5b34SRafael Vanoni return (err);
195b47b5b34SRafael Vanoni }
196b47b5b34SRafael Vanoni
197b47b5b34SRafael Vanoni (void) kstat_read(kc, ksp, NULL);
198b47b5b34SRafael Vanoni
199b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
200b47b5b34SRafael Vanoni s = knp->value.str.addr.ptr;
201b47b5b34SRafael Vanoni
202b47b5b34SRafael Vanoni g_npstates = 0;
203b47b5b34SRafael Vanoni
204b47b5b34SRafael Vanoni for (token = strtok(s, ":"), s = NULL;
205*2d83778aSRafael Vanoni token != NULL && g_npstates < NSTATES;
206b47b5b34SRafael Vanoni token = strtok(NULL, ":")) {
207b47b5b34SRafael Vanoni
208b47b5b34SRafael Vanoni state->speed = HZ2MHZ(atoll(token));
209b47b5b34SRafael Vanoni
210b47b5b34SRafael Vanoni if (state->speed > max_cpufreq)
211b47b5b34SRafael Vanoni max_cpufreq = state->speed;
212b47b5b34SRafael Vanoni
213b47b5b34SRafael Vanoni state->total_time = (uint64_t)0;
214b47b5b34SRafael Vanoni
215b47b5b34SRafael Vanoni g_npstates++;
216b47b5b34SRafael Vanoni state++;
217b47b5b34SRafael Vanoni }
218b47b5b34SRafael Vanoni
219b47b5b34SRafael Vanoni if (token != NULL)
220*2d83778aSRafael Vanoni pt_error("CPU exceeds the supported number of %s\n",
221*2d83778aSRafael Vanoni g_msg_freq_state);
222b47b5b34SRafael Vanoni
223b47b5b34SRafael Vanoni (void) kstat_close(kc);
224b47b5b34SRafael Vanoni
225b47b5b34SRafael Vanoni /*
226b47b5b34SRafael Vanoni * Return if speed transition is not supported
227b47b5b34SRafael Vanoni */
228b47b5b34SRafael Vanoni if (g_npstates < 2)
229b47b5b34SRafael Vanoni return (-1);
230b47b5b34SRafael Vanoni
231b47b5b34SRafael Vanoni /*
232b47b5b34SRafael Vanoni * Setup DTrace to look for CPU frequency changes
233b47b5b34SRafael Vanoni */
234b47b5b34SRafael Vanoni if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
235*2d83778aSRafael Vanoni pt_error("cannot open dtrace library for the %s report: %s\n",
236*2d83778aSRafael Vanoni g_msg_freq_state, dtrace_errmsg(NULL, err));
237b47b5b34SRafael Vanoni return (-2);
238b47b5b34SRafael Vanoni }
239b47b5b34SRafael Vanoni
240b47b5b34SRafael Vanoni /*
241b47b5b34SRafael Vanoni * Execute different scripts (defined above) depending on
242b47b5b34SRafael Vanoni * user specified options. Default mode uses dtp_cpufreq.
243b47b5b34SRafael Vanoni */
244636423dbSRafael Vanoni if (PT_ON_CPU)
245b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq_c;
246b47b5b34SRafael Vanoni else
247b47b5b34SRafael Vanoni prog_ptr = (char *)dtp_cpufreq;
248b47b5b34SRafael Vanoni
249b47b5b34SRafael Vanoni if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
250b47b5b34SRafael Vanoni DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
251*2d83778aSRafael Vanoni pt_error("failed to compile %s program\n", g_msg_freq_state);
252b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
253b47b5b34SRafael Vanoni }
254b47b5b34SRafael Vanoni
255b47b5b34SRafael Vanoni if (dtrace_program_exec(dtp, prog, &info) == -1) {
256*2d83778aSRafael Vanoni pt_error("failed to enable %s probes\n", g_msg_freq_state);
257b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
258b47b5b34SRafael Vanoni }
259b47b5b34SRafael Vanoni
260*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
261*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggsize'\n", g_msg_freq_state);
262b47b5b34SRafael Vanoni
263*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggrate", "0") == -1)
264*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggrate'\n", g_msg_freq_state);
265b47b5b34SRafael Vanoni
266*2d83778aSRafael Vanoni if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
267*2d83778aSRafael Vanoni pt_error("failed to set %s 'aggpercpu'\n", g_msg_freq_state);
268b47b5b34SRafael Vanoni
269b47b5b34SRafael Vanoni if (dtrace_go(dtp) != 0) {
270*2d83778aSRafael Vanoni pt_error("failed to start %s observation\n", g_msg_freq_state);
271b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
272b47b5b34SRafael Vanoni }
273b47b5b34SRafael Vanoni
274b47b5b34SRafael Vanoni if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
275*2d83778aSRafael Vanoni pt_error("failed to get %s 'statusrate'\n", g_msg_freq_state);
276b47b5b34SRafael Vanoni return (dtrace_errno(dtp));
277b47b5b34SRafael Vanoni }
278b47b5b34SRafael Vanoni
279b47b5b34SRafael Vanoni return (0);
280b47b5b34SRafael Vanoni }
281b47b5b34SRafael Vanoni
282b47b5b34SRafael Vanoni /*
283b47b5b34SRafael Vanoni * The DTrace probes have already been enabled, and are tracking
284b47b5b34SRafael Vanoni * CPU speed transitions. Take a snapshot of the aggregations, and
285b47b5b34SRafael Vanoni * look for any CPUs that have made a speed transition over the last
286b47b5b34SRafael Vanoni * sampling interval. Note that the aggregations may be empty if no
287b47b5b34SRafael Vanoni * speed transitions took place over the last interval. In that case,
288b47b5b34SRafael Vanoni * notate that we have already accounted for the time, so that when
289b47b5b34SRafael Vanoni * we do encounter a speed transition in a future sampling interval
290b47b5b34SRafael Vanoni * we can subtract that time back out.
291b47b5b34SRafael Vanoni */
292b47b5b34SRafael Vanoni int
pt_cpufreq_stat_collect(double interval)293b47b5b34SRafael Vanoni pt_cpufreq_stat_collect(double interval)
294b47b5b34SRafael Vanoni {
295b47b5b34SRafael Vanoni int i, ret;
296b47b5b34SRafael Vanoni
297b47b5b34SRafael Vanoni /*
298b47b5b34SRafael Vanoni * Zero out the interval time reported by DTrace for
299b47b5b34SRafael Vanoni * this interval
300b47b5b34SRafael Vanoni */
301b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++)
302b47b5b34SRafael Vanoni g_pstate_info[i].total_time = 0;
303b47b5b34SRafael Vanoni
304b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus; i++)
305b47b5b34SRafael Vanoni g_cpu_power_states[i].dtrace_time = 0;
306b47b5b34SRafael Vanoni
307b47b5b34SRafael Vanoni if (dtrace_status(dtp) == -1)
308b47b5b34SRafael Vanoni return (-1);
309b47b5b34SRafael Vanoni
310b47b5b34SRafael Vanoni if (dtrace_aggregate_snap(dtp) != 0)
311*2d83778aSRafael Vanoni pt_error("failed to collect data for %s\n", g_msg_freq_state);
312b47b5b34SRafael Vanoni
313b47b5b34SRafael Vanoni if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
314b47b5b34SRafael Vanoni NULL) != 0)
315*2d83778aSRafael Vanoni pt_error("failed to sort data for %s\n", g_msg_freq_state);
316b47b5b34SRafael Vanoni
317b47b5b34SRafael Vanoni dtrace_aggregate_clear(dtp);
318b47b5b34SRafael Vanoni
319b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot()) != 0) {
320*2d83778aSRafael Vanoni pt_error("failed to snapshot %s state\n", g_msg_freq_state);
321b47b5b34SRafael Vanoni return (ret);
322b47b5b34SRafael Vanoni }
323b47b5b34SRafael Vanoni
324b47b5b34SRafael Vanoni switch (g_op_mode) {
325636423dbSRafael Vanoni case PT_MODE_CPU:
326b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, g_observed_cpu);
327b47b5b34SRafael Vanoni break;
328636423dbSRafael Vanoni case PT_MODE_DEFAULT:
329b47b5b34SRafael Vanoni default:
330b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++)
331b47b5b34SRafael Vanoni pt_cpufreq_stat_account(interval, i);
332b47b5b34SRafael Vanoni break;
333b47b5b34SRafael Vanoni }
334b47b5b34SRafael Vanoni
335b47b5b34SRafael Vanoni return (0);
336b47b5b34SRafael Vanoni }
337b47b5b34SRafael Vanoni
338b47b5b34SRafael Vanoni static void
pt_cpufreq_stat_account(double interval,uint_t cpu)339b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu)
340b47b5b34SRafael Vanoni {
341636423dbSRafael Vanoni cpu_power_info_t *cpu_pow;
342b47b5b34SRafael Vanoni uint64_t speed;
343b47b5b34SRafael Vanoni hrtime_t duration;
344b47b5b34SRafael Vanoni int i;
345b47b5b34SRafael Vanoni
346b47b5b34SRafael Vanoni cpu_pow = &g_cpu_power_states[cpu];
347b47b5b34SRafael Vanoni speed = cpu_pow->current_pstate;
348b47b5b34SRafael Vanoni
349636423dbSRafael Vanoni duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time;
350636423dbSRafael Vanoni
351636423dbSRafael Vanoni /*
352636423dbSRafael Vanoni * 'duration' may be a negative value when we're using or forcing a
353636423dbSRafael Vanoni * small interval, and the amount of time already accounted ends up
354636423dbSRafael Vanoni * being larger than the the former.
355636423dbSRafael Vanoni */
356636423dbSRafael Vanoni if (duration < 0)
357636423dbSRafael Vanoni return;
358b47b5b34SRafael Vanoni
359b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) {
360b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) {
361b47b5b34SRafael Vanoni g_pstate_info[i].total_time += duration;
362b47b5b34SRafael Vanoni cpu_pow->time_accounted += duration;
363636423dbSRafael Vanoni cpu_pow->speed_accounted = speed;
364b47b5b34SRafael Vanoni }
365b47b5b34SRafael Vanoni }
366b47b5b34SRafael Vanoni }
367b47b5b34SRafael Vanoni
368b47b5b34SRafael Vanoni /*
369b47b5b34SRafael Vanoni * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
370b47b5b34SRafael Vanoni */
371b47b5b34SRafael Vanoni static int
pt_cpufreq_snapshot(void)372b47b5b34SRafael Vanoni pt_cpufreq_snapshot(void)
373b47b5b34SRafael Vanoni {
374b47b5b34SRafael Vanoni kstat_ctl_t *kc;
375b47b5b34SRafael Vanoni int ret;
376b47b5b34SRafael Vanoni uint_t i;
377b47b5b34SRafael Vanoni
378b47b5b34SRafael Vanoni if ((kc = kstat_open()) == NULL)
379b47b5b34SRafael Vanoni return (errno);
380b47b5b34SRafael Vanoni
381b47b5b34SRafael Vanoni switch (g_op_mode) {
382636423dbSRafael Vanoni case PT_MODE_CPU:
383b47b5b34SRafael Vanoni ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
384b47b5b34SRafael Vanoni break;
385636423dbSRafael Vanoni case PT_MODE_DEFAULT:
386b47b5b34SRafael Vanoni default:
387b47b5b34SRafael Vanoni for (i = 0; i < g_ncpus_observed; i++)
388b47b5b34SRafael Vanoni if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
389b47b5b34SRafael Vanoni break;
390b47b5b34SRafael Vanoni break;
391b47b5b34SRafael Vanoni }
392b47b5b34SRafael Vanoni
393b47b5b34SRafael Vanoni if (kstat_close(kc) != 0)
394*2d83778aSRafael Vanoni pt_error("couldn't close %s kstat\n", g_msg_freq_state);
395b47b5b34SRafael Vanoni
396b47b5b34SRafael Vanoni return (ret);
397b47b5b34SRafael Vanoni }
398b47b5b34SRafael Vanoni
399b47b5b34SRafael Vanoni static int
pt_cpufreq_snapshot_cpu(kstat_ctl_t * kc,uint_t cpu)400b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
401b47b5b34SRafael Vanoni {
402b47b5b34SRafael Vanoni kstat_t *ksp;
403b47b5b34SRafael Vanoni kstat_named_t *knp;
404b47b5b34SRafael Vanoni
405b47b5b34SRafael Vanoni ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
406b47b5b34SRafael Vanoni if (ksp == NULL) {
407*2d83778aSRafael Vanoni pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while "
408*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state);
409b47b5b34SRafael Vanoni return (1);
410b47b5b34SRafael Vanoni }
411b47b5b34SRafael Vanoni
412b47b5b34SRafael Vanoni if (kstat_read(kc, ksp, NULL) == -1) {
413*2d83778aSRafael Vanoni pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while "
414*2d83778aSRafael Vanoni "taking a snapshot of %s\n", cpu, g_msg_freq_state);
415b47b5b34SRafael Vanoni return (2);
416b47b5b34SRafael Vanoni }
417b47b5b34SRafael Vanoni
418b47b5b34SRafael Vanoni knp = kstat_data_lookup(ksp, "current_clock_Hz");
419b47b5b34SRafael Vanoni if (knp == NULL) {
420*2d83778aSRafael Vanoni pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d "
421*2d83778aSRafael Vanoni "while taking a snapshot of %s\n", cpu, g_msg_freq_state);
422b47b5b34SRafael Vanoni return (3);
423b47b5b34SRafael Vanoni }
424b47b5b34SRafael Vanoni
425b47b5b34SRafael Vanoni g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
426b47b5b34SRafael Vanoni
427b47b5b34SRafael Vanoni return (0);
428b47b5b34SRafael Vanoni }
429b47b5b34SRafael Vanoni
430b47b5b34SRafael Vanoni /*
431b47b5b34SRafael Vanoni * DTrace aggregation walker that sorts through a snapshot of the
432b47b5b34SRafael Vanoni * aggregation data collected during firings of the cpu-change-speed
433b47b5b34SRafael Vanoni * probe.
434b47b5b34SRafael Vanoni */
435b47b5b34SRafael Vanoni /*ARGSUSED*/
436b47b5b34SRafael Vanoni static int
pt_cpufreq_dtrace_walk(const dtrace_aggdata_t * data,void * arg)437b47b5b34SRafael Vanoni pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
438b47b5b34SRafael Vanoni {
439b47b5b34SRafael Vanoni dtrace_aggdesc_t *aggdesc = data->dtada_desc;
440b47b5b34SRafael Vanoni dtrace_recdesc_t *cpu_rec, *speed_rec;
441636423dbSRafael Vanoni cpu_power_info_t *cp;
442b47b5b34SRafael Vanoni int32_t cpu;
443b47b5b34SRafael Vanoni uint64_t speed;
444636423dbSRafael Vanoni hrtime_t res;
445b47b5b34SRafael Vanoni int i;
446b47b5b34SRafael Vanoni
447b47b5b34SRafael Vanoni if (strcmp(aggdesc->dtagd_name, "times") == 0) {
448b47b5b34SRafael Vanoni cpu_rec = &aggdesc->dtagd_rec[1];
449b47b5b34SRafael Vanoni speed_rec = &aggdesc->dtagd_rec[2];
450b47b5b34SRafael Vanoni
451b47b5b34SRafael Vanoni /* LINTED - alignment */
452b47b5b34SRafael Vanoni cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
453636423dbSRafael Vanoni
454636423dbSRafael Vanoni /* LINTED - alignment */
455636423dbSRafael Vanoni res = *((hrtime_t *)(data->dtada_percpu[cpu]));
456636423dbSRafael Vanoni
457b47b5b34SRafael Vanoni /* LINTED - alignment */
458b47b5b34SRafael Vanoni speed = *(uint64_t *)(data->dtada_data +
459b47b5b34SRafael Vanoni speed_rec->dtrd_offset);
460b47b5b34SRafael Vanoni
461636423dbSRafael Vanoni if (speed == 0)
462b47b5b34SRafael Vanoni speed = max_cpufreq;
463636423dbSRafael Vanoni else
464636423dbSRafael Vanoni speed = HZ2MHZ(speed);
465b47b5b34SRafael Vanoni
466b47b5b34SRafael Vanoni /*
467b47b5b34SRafael Vanoni * We have an aggregation record for "cpu" being at "speed"
468b47b5b34SRafael Vanoni * for an interval of "n" nanoseconds. The reported interval
469b47b5b34SRafael Vanoni * may exceed the powertop sampling interval, since we only
470b47b5b34SRafael Vanoni * notice during potentially infrequent firings of the
471b47b5b34SRafael Vanoni * "speed change" DTrace probe. In this case powertop would
472b47b5b34SRafael Vanoni * have already accounted for the portions of the interval
473b47b5b34SRafael Vanoni * that happened during prior powertop samplings, so subtract
474b47b5b34SRafael Vanoni * out time already accounted.
475b47b5b34SRafael Vanoni */
476636423dbSRafael Vanoni cp = &g_cpu_power_states[cpu];
477b47b5b34SRafael Vanoni
478b47b5b34SRafael Vanoni for (i = 0; i < g_npstates; i++) {
479b47b5b34SRafael Vanoni if (g_pstate_info[i].speed == speed) {
480636423dbSRafael Vanoni
481636423dbSRafael Vanoni if (cp->time_accounted > 0 &&
482636423dbSRafael Vanoni cp->speed_accounted == speed) {
483636423dbSRafael Vanoni if (res > cp->time_accounted) {
484636423dbSRafael Vanoni res -= cp->time_accounted;
485636423dbSRafael Vanoni cp->time_accounted = 0;
486636423dbSRafael Vanoni cp->speed_accounted = 0;
487636423dbSRafael Vanoni } else {
488636423dbSRafael Vanoni return (DTRACE_AGGWALK_NEXT);
489b47b5b34SRafael Vanoni }
490b47b5b34SRafael Vanoni }
491636423dbSRafael Vanoni
492636423dbSRafael Vanoni g_pstate_info[i].total_time += res;
493636423dbSRafael Vanoni cp->dtrace_time += res;
494b47b5b34SRafael Vanoni }
495b47b5b34SRafael Vanoni }
496b47b5b34SRafael Vanoni }
497636423dbSRafael Vanoni
498b47b5b34SRafael Vanoni return (DTRACE_AGGWALK_NEXT);
499b47b5b34SRafael Vanoni }
500b47b5b34SRafael Vanoni
501b47b5b34SRafael Vanoni /*
5029bbf5ba1SRafael Vanoni * Checks if PM is enabled in /etc/power.conf, enabling if not
5039bbf5ba1SRafael Vanoni */
5049bbf5ba1SRafael Vanoni void
pt_cpufreq_suggest(void)5059bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void)
5069bbf5ba1SRafael Vanoni {
5079bbf5ba1SRafael Vanoni int ret = pt_cpufreq_check_pm();
5089bbf5ba1SRafael Vanoni
5099bbf5ba1SRafael Vanoni switch (ret) {
5109bbf5ba1SRafael Vanoni case 0:
5119bbf5ba1SRafael Vanoni pt_sugg_add("Suggestion: enable CPU power management by "
5129bbf5ba1SRafael Vanoni "pressing the P key", 40, 'P', (char *)g_msg_freq_enable,
5139bbf5ba1SRafael Vanoni pt_cpufreq_enable);
5149bbf5ba1SRafael Vanoni break;
5159bbf5ba1SRafael Vanoni }
5169bbf5ba1SRafael Vanoni }
5179bbf5ba1SRafael Vanoni
5189bbf5ba1SRafael Vanoni /*
5199bbf5ba1SRafael Vanoni * Checks /etc/power.conf and returns:
5209bbf5ba1SRafael Vanoni *
5219bbf5ba1SRafael Vanoni * 0 if CPUPM is not enabled
5229bbf5ba1SRafael Vanoni * 1 if there's nothing for us to do because:
5239bbf5ba1SRafael Vanoni * (a) the system does not support frequency scaling
5249bbf5ba1SRafael Vanoni * (b) there's no power.conf.
5259bbf5ba1SRafael Vanoni * 2 if CPUPM is enabled
5269bbf5ba1SRafael Vanoni * 3 if the system is running in poll-mode, as opposed to event-mode
5279bbf5ba1SRafael Vanoni *
5289bbf5ba1SRafael Vanoni * Notice the ordering of the return values, they will be picked up and
5299bbf5ba1SRafael Vanoni * switched upon ascendingly.
5309bbf5ba1SRafael Vanoni */
5319bbf5ba1SRafael Vanoni static int
pt_cpufreq_check_pm(void)5329bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void)
5339bbf5ba1SRafael Vanoni {
5349bbf5ba1SRafael Vanoni char line[1024];
5359bbf5ba1SRafael Vanoni FILE *file;
5369bbf5ba1SRafael Vanoni int ret = 0;
5379bbf5ba1SRafael Vanoni
5389bbf5ba1SRafael Vanoni if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL)
5399bbf5ba1SRafael Vanoni return (1);
5409bbf5ba1SRafael Vanoni
5419bbf5ba1SRafael Vanoni (void) memset(line, 0, 1024);
5429bbf5ba1SRafael Vanoni
5439bbf5ba1SRafael Vanoni while (fgets(line, 1024, file)) {
5449bbf5ba1SRafael Vanoni if (strstr(line, "cpupm")) {
5459bbf5ba1SRafael Vanoni if (strstr(line, "enable")) {
5469bbf5ba1SRafael Vanoni (void) fclose(file);
5479bbf5ba1SRafael Vanoni return (2);
5489bbf5ba1SRafael Vanoni }
5499bbf5ba1SRafael Vanoni }
5509bbf5ba1SRafael Vanoni if (strstr(line, "poll"))
5519bbf5ba1SRafael Vanoni ret = 3;
5529bbf5ba1SRafael Vanoni }
5539bbf5ba1SRafael Vanoni
5549bbf5ba1SRafael Vanoni (void) fclose(file);
5559bbf5ba1SRafael Vanoni
5569bbf5ba1SRafael Vanoni return (ret);
5579bbf5ba1SRafael Vanoni }
5589bbf5ba1SRafael Vanoni
5599bbf5ba1SRafael Vanoni /*
560b47b5b34SRafael Vanoni * Used as a suggestion, sets PM in /etc/power.conf and
561b47b5b34SRafael Vanoni * a 1sec threshold, then calls /usr/sbin/pmconfig
562b47b5b34SRafael Vanoni */
5639bbf5ba1SRafael Vanoni static void
pt_cpufreq_enable(void)5649bbf5ba1SRafael Vanoni pt_cpufreq_enable(void)
565b47b5b34SRafael Vanoni {
566b47b5b34SRafael Vanoni (void) system(cpupm_enable);
567b47b5b34SRafael Vanoni (void) system(cpupm_treshold);
568b47b5b34SRafael Vanoni (void) system(default_pmconf);
569b47b5b34SRafael Vanoni
5709bbf5ba1SRafael Vanoni if (pt_sugg_remove(pt_cpufreq_enable) == 0)
571*2d83778aSRafael Vanoni pt_error("failed to remove a %s suggestion\n",
572*2d83778aSRafael Vanoni g_msg_freq_state);
573b47b5b34SRafael Vanoni }
574