xref: /titanic_50/usr/src/cmd/powertop/common/cpufreq.c (revision 9bbf5ba14ae201f78f3d6b47a9fac96d68649275)
1b47b5b34SRafael Vanoni /*
2b47b5b34SRafael Vanoni  * Copyright 2009, Intel Corporation
3b47b5b34SRafael Vanoni  * Copyright 2009, Sun Microsystems, Inc
4b47b5b34SRafael Vanoni  *
5b47b5b34SRafael Vanoni  * This file is part of PowerTOP
6b47b5b34SRafael Vanoni  *
7b47b5b34SRafael Vanoni  * This program file is free software; you can redistribute it and/or modify it
8b47b5b34SRafael Vanoni  * under the terms of the GNU General Public License as published by the
9b47b5b34SRafael Vanoni  * Free Software Foundation; version 2 of the License.
10b47b5b34SRafael Vanoni  *
11b47b5b34SRafael Vanoni  * This program is distributed in the hope that it will be useful, but WITHOUT
12b47b5b34SRafael Vanoni  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13b47b5b34SRafael Vanoni  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14b47b5b34SRafael Vanoni  * for more details.
15b47b5b34SRafael Vanoni  *
16b47b5b34SRafael Vanoni  * You should have received a copy of the GNU General Public License
17b47b5b34SRafael Vanoni  * along with this program in a file named COPYING; if not, write to the
18b47b5b34SRafael Vanoni  * Free Software Foundation, Inc.,
19b47b5b34SRafael Vanoni  * 51 Franklin Street, Fifth Floor,
20b47b5b34SRafael Vanoni  * Boston, MA 02110-1301 USA
21b47b5b34SRafael Vanoni  *
22b47b5b34SRafael Vanoni  * Authors:
23b47b5b34SRafael Vanoni  *	Arjan van de Ven <arjan@linux.intel.com>
24b47b5b34SRafael Vanoni  *	Eric C Saxe <eric.saxe@sun.com>
25b47b5b34SRafael Vanoni  *	Aubrey Li <aubrey.li@intel.com>
26b47b5b34SRafael Vanoni  */
27b47b5b34SRafael Vanoni 
28b47b5b34SRafael Vanoni /*
29b47b5b34SRafael Vanoni  * GPL Disclaimer
30b47b5b34SRafael Vanoni  *
31b47b5b34SRafael Vanoni  * For the avoidance of doubt, except that if any license choice other
32b47b5b34SRafael Vanoni  * than GPL or LGPL is available it will apply instead, Sun elects to
33b47b5b34SRafael Vanoni  * use only the General Public License version 2 (GPLv2) at this time
34b47b5b34SRafael Vanoni  * for any software where a choice of GPL license versions is made
35b47b5b34SRafael Vanoni  * available with the language indicating that GPLv2 or any later
36b47b5b34SRafael Vanoni  * version may be used, or where a choice of which version of the GPL
37b47b5b34SRafael Vanoni  * is applied is otherwise unspecified.
38b47b5b34SRafael Vanoni  */
39b47b5b34SRafael Vanoni 
40b47b5b34SRafael Vanoni #include <stdlib.h>
41b47b5b34SRafael Vanoni #include <string.h>
42b47b5b34SRafael Vanoni #include <dtrace.h>
43b47b5b34SRafael Vanoni #include <kstat.h>
44b47b5b34SRafael Vanoni #include <errno.h>
45b47b5b34SRafael Vanoni #include "powertop.h"
46b47b5b34SRafael Vanoni 
47636423dbSRafael Vanoni #define	HZ2MHZ(speed)	((speed) / MICROSEC)
48b47b5b34SRafael Vanoni #define	DTP_ARG_COUNT	2
49b47b5b34SRafael Vanoni #define	DTP_ARG_LENGTH	5
50b47b5b34SRafael Vanoni 
51b47b5b34SRafael Vanoni static uint64_t		max_cpufreq = 0;
52b47b5b34SRafael Vanoni static dtrace_hdl_t	*dtp;
53b47b5b34SRafael Vanoni static char		**dtp_argv;
54b47b5b34SRafael Vanoni 
55b47b5b34SRafael Vanoni /*
56b47b5b34SRafael Vanoni  * Enabling PM through /etc/power.conf
57*9bbf5ba1SRafael Vanoni  * See pt_cpufreq_suggest()
58b47b5b34SRafael Vanoni  */
59b47b5b34SRafael Vanoni static char default_conf[]	= "/etc/power.conf";
60b47b5b34SRafael Vanoni static char default_pmconf[]	= "/usr/sbin/pmconfig";
61b47b5b34SRafael Vanoni static char cpupm_enable[]	= "echo cpupm enable >> /etc/power.conf";
62b47b5b34SRafael Vanoni static char cpupm_treshold[]	= "echo cpu-threshold 1s >> /etc/power.conf";
63b47b5b34SRafael Vanoni 
64b47b5b34SRafael Vanoni /*
65b47b5b34SRafael Vanoni  * Buffer containing DTrace program to track CPU frequency transitions
66b47b5b34SRafael Vanoni  */
67b47b5b34SRafael Vanoni static const char *dtp_cpufreq =
68b47b5b34SRafael Vanoni "hrtime_t last[$0];"
69b47b5b34SRafael Vanoni ""
70b47b5b34SRafael Vanoni "BEGIN"
71b47b5b34SRafael Vanoni "{"
72b47b5b34SRafael Vanoni "	begin = timestamp;"
73b47b5b34SRafael Vanoni "}"
74b47b5b34SRafael Vanoni ""
75b47b5b34SRafael Vanoni ":::cpu-change-speed"
76b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] != 0/"
77b47b5b34SRafael Vanoni "{"
78b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
79636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
80b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
81b47b5b34SRafael Vanoni "	last[this->cpu] = timestamp;"
82b47b5b34SRafael Vanoni "}"
83b47b5b34SRafael Vanoni ":::cpu-change-speed"
84b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] == 0/"
85b47b5b34SRafael Vanoni "{"
86b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
87636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
88b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
89b47b5b34SRafael Vanoni "	last[this->cpu] = timestamp;"
90b47b5b34SRafael Vanoni "}";
91b47b5b34SRafael Vanoni 
92b47b5b34SRafael Vanoni /*
93b47b5b34SRafael Vanoni  * Same as above, but only for a specific CPU
94b47b5b34SRafael Vanoni  */
95b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c =
96b47b5b34SRafael Vanoni "hrtime_t last;"
97b47b5b34SRafael Vanoni ""
98b47b5b34SRafael Vanoni "BEGIN"
99b47b5b34SRafael Vanoni "{"
100b47b5b34SRafael Vanoni "	begin = timestamp;"
101b47b5b34SRafael Vanoni "}"
102b47b5b34SRafael Vanoni ""
103b47b5b34SRafael Vanoni ":::cpu-change-speed"
104b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
105b47b5b34SRafael Vanoni " last != 0/"
106b47b5b34SRafael Vanoni "{"
107b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
108636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
109b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - last);"
110b47b5b34SRafael Vanoni "	last = timestamp;"
111b47b5b34SRafael Vanoni "}"
112b47b5b34SRafael Vanoni ":::cpu-change-speed"
113b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
114b47b5b34SRafael Vanoni " last == 0/"
115b47b5b34SRafael Vanoni "{"
116b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
117636423dbSRafael Vanoni "	this->oldspeed = (uint64_t)arg1;"
118b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
119b47b5b34SRafael Vanoni "	last = timestamp;"
120b47b5b34SRafael Vanoni "}";
121b47b5b34SRafael Vanoni 
122b47b5b34SRafael Vanoni static int	pt_cpufreq_setup(void);
123b47b5b34SRafael Vanoni static int	pt_cpufreq_snapshot(void);
124b47b5b34SRafael Vanoni static int	pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
125b47b5b34SRafael Vanoni static void	pt_cpufreq_stat_account(double, uint_t);
126*9bbf5ba1SRafael Vanoni static int	pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t);
127*9bbf5ba1SRafael Vanoni static int	pt_cpufreq_check_pm(void);
128*9bbf5ba1SRafael Vanoni static void	pt_cpufreq_enable(void);
129b47b5b34SRafael Vanoni 
130b47b5b34SRafael Vanoni static int
131b47b5b34SRafael Vanoni pt_cpufreq_setup(void)
132b47b5b34SRafael Vanoni {
133b47b5b34SRafael Vanoni 	if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
134b47b5b34SRafael Vanoni 		return (EXIT_FAILURE);
135b47b5b34SRafael Vanoni 
136b47b5b34SRafael Vanoni 	if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
137b47b5b34SRafael Vanoni 		free(dtp_argv);
138b47b5b34SRafael Vanoni 		return (EXIT_FAILURE);
139b47b5b34SRafael Vanoni 	}
140b47b5b34SRafael Vanoni 
141b47b5b34SRafael Vanoni 	(void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
142b47b5b34SRafael Vanoni 
143636423dbSRafael Vanoni 	if (PT_ON_CPU) {
144b47b5b34SRafael Vanoni 		if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
145b47b5b34SRafael Vanoni 		    == NULL) {
146b47b5b34SRafael Vanoni 			free(dtp_argv[0]);
147b47b5b34SRafael Vanoni 			free(dtp_argv);
148b47b5b34SRafael Vanoni 			return (EXIT_FAILURE);
149b47b5b34SRafael Vanoni 		}
150b47b5b34SRafael Vanoni 		(void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
151b47b5b34SRafael Vanoni 	}
152b47b5b34SRafael Vanoni 
153b47b5b34SRafael Vanoni 	return (0);
154b47b5b34SRafael Vanoni }
155b47b5b34SRafael Vanoni 
156b47b5b34SRafael Vanoni /*
157b47b5b34SRafael Vanoni  * Perform setup necessary to enumerate and track CPU speed changes
158b47b5b34SRafael Vanoni  */
159b47b5b34SRafael Vanoni int
160b47b5b34SRafael Vanoni pt_cpufreq_stat_prepare(void)
161b47b5b34SRafael Vanoni {
162b47b5b34SRafael Vanoni 	dtrace_prog_t 		*prog;
163b47b5b34SRafael Vanoni 	dtrace_proginfo_t 	info;
164b47b5b34SRafael Vanoni 	dtrace_optval_t 	statustime;
165b47b5b34SRafael Vanoni 	kstat_ctl_t 		*kc;
166b47b5b34SRafael Vanoni 	kstat_t 		*ksp;
167b47b5b34SRafael Vanoni 	kstat_named_t 		*knp;
168b47b5b34SRafael Vanoni 	freq_state_info_t 	*state;
169b47b5b34SRafael Vanoni 	char 			*s, *token, *prog_ptr;
170b47b5b34SRafael Vanoni 	int 			err;
171b47b5b34SRafael Vanoni 
172b47b5b34SRafael Vanoni 	if ((err = pt_cpufreq_setup()) != 0) {
173b47b5b34SRafael Vanoni 		pt_error("%s : failed to setup", __FILE__);
174b47b5b34SRafael Vanoni 		return (errno);
175b47b5b34SRafael Vanoni 	}
176b47b5b34SRafael Vanoni 
177b47b5b34SRafael Vanoni 	state = g_pstate_info;
178b47b5b34SRafael Vanoni 	if ((g_cpu_power_states = calloc((size_t)g_ncpus,
179b47b5b34SRafael Vanoni 	    sizeof (cpu_power_info_t))) == NULL)
180b47b5b34SRafael Vanoni 		return (-1);
181b47b5b34SRafael Vanoni 
182b47b5b34SRafael Vanoni 	/*
183b47b5b34SRafael Vanoni 	 * Enumerate the CPU frequencies
184b47b5b34SRafael Vanoni 	 */
185b47b5b34SRafael Vanoni 	if ((kc = kstat_open()) == NULL)
186b47b5b34SRafael Vanoni 		return (errno);
187b47b5b34SRafael Vanoni 
188b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
189b47b5b34SRafael Vanoni 
190b47b5b34SRafael Vanoni 	if (ksp == NULL) {
191b47b5b34SRafael Vanoni 		err = errno;
192b47b5b34SRafael Vanoni 		(void) kstat_close(kc);
193b47b5b34SRafael Vanoni 		return (err);
194b47b5b34SRafael Vanoni 	}
195b47b5b34SRafael Vanoni 
196b47b5b34SRafael Vanoni 	(void) kstat_read(kc, ksp, NULL);
197b47b5b34SRafael Vanoni 
198b47b5b34SRafael Vanoni 	knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
199b47b5b34SRafael Vanoni 	s = knp->value.str.addr.ptr;
200b47b5b34SRafael Vanoni 
201b47b5b34SRafael Vanoni 	g_npstates = 0;
202b47b5b34SRafael Vanoni 
203b47b5b34SRafael Vanoni 	for (token = strtok(s, ":"), s = NULL;
204b47b5b34SRafael Vanoni 	    NULL != token && g_npstates < NSTATES;
205b47b5b34SRafael Vanoni 	    token = strtok(NULL, ":")) {
206b47b5b34SRafael Vanoni 
207b47b5b34SRafael Vanoni 		state->speed = HZ2MHZ(atoll(token));
208b47b5b34SRafael Vanoni 
209b47b5b34SRafael Vanoni 		if (state->speed > max_cpufreq)
210b47b5b34SRafael Vanoni 			max_cpufreq = state->speed;
211b47b5b34SRafael Vanoni 
212b47b5b34SRafael Vanoni 		state->total_time = (uint64_t)0;
213b47b5b34SRafael Vanoni 
214b47b5b34SRafael Vanoni 		g_npstates++;
215b47b5b34SRafael Vanoni 		state++;
216b47b5b34SRafael Vanoni 	}
217b47b5b34SRafael Vanoni 
218b47b5b34SRafael Vanoni 	if (token != NULL)
219b47b5b34SRafael Vanoni 		pt_error("%s : exceeded NSTATES\n", __FILE__);
220b47b5b34SRafael Vanoni 
221b47b5b34SRafael Vanoni 	(void) kstat_close(kc);
222b47b5b34SRafael Vanoni 
223b47b5b34SRafael Vanoni 	/*
224b47b5b34SRafael Vanoni 	 * Return if speed transition is not supported
225b47b5b34SRafael Vanoni 	 */
226b47b5b34SRafael Vanoni 	if (g_npstates < 2)
227b47b5b34SRafael Vanoni 		return (-1);
228b47b5b34SRafael Vanoni 
229b47b5b34SRafael Vanoni 	/*
230b47b5b34SRafael Vanoni 	 * Setup DTrace to look for CPU frequency changes
231b47b5b34SRafael Vanoni 	 */
232b47b5b34SRafael Vanoni 	if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
233b47b5b34SRafael Vanoni 		pt_error("%s : cannot open dtrace library: %s\n", __FILE__,
234b47b5b34SRafael Vanoni 		    dtrace_errmsg(NULL, err));
235b47b5b34SRafael Vanoni 		return (-2);
236b47b5b34SRafael Vanoni 	}
237b47b5b34SRafael Vanoni 
238b47b5b34SRafael Vanoni 	/*
239b47b5b34SRafael Vanoni 	 * Execute different scripts (defined above) depending on
240b47b5b34SRafael Vanoni 	 * user specified options. Default mode uses dtp_cpufreq.
241b47b5b34SRafael Vanoni 	 */
242636423dbSRafael Vanoni 	if (PT_ON_CPU)
243b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq_c;
244b47b5b34SRafael Vanoni 	else
245b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq;
246b47b5b34SRafael Vanoni 
247b47b5b34SRafael Vanoni 	if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
248b47b5b34SRafael Vanoni 	    DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
249b47b5b34SRafael Vanoni 		pt_error("%s : cpu-change-speed probe unavailable\n", __FILE__);
250b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
251b47b5b34SRafael Vanoni 	}
252b47b5b34SRafael Vanoni 
253b47b5b34SRafael Vanoni 	if (dtrace_program_exec(dtp, prog, &info) == -1) {
254b47b5b34SRafael Vanoni 		pt_error("%s : failed to enable speed probe\n", __FILE__);
255b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
256b47b5b34SRafael Vanoni 	}
257b47b5b34SRafael Vanoni 
258b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggsize", "128k") == -1) {
259b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggsize'\n", __FILE__);
260b47b5b34SRafael Vanoni 	}
261b47b5b34SRafael Vanoni 
262b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggrate", "0") == -1) {
263b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggrate'\n", __FILE__);
264b47b5b34SRafael Vanoni 	}
265b47b5b34SRafael Vanoni 
266b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggpercpu", 0) == -1) {
267b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggpercpu'\n", __FILE__);
268b47b5b34SRafael Vanoni 	}
269b47b5b34SRafael Vanoni 
270b47b5b34SRafael Vanoni 	if (dtrace_go(dtp) != 0) {
271b47b5b34SRafael Vanoni 		pt_error("%s : failed to start speed observation", __FILE__);
272b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
273b47b5b34SRafael Vanoni 	}
274b47b5b34SRafael Vanoni 
275b47b5b34SRafael Vanoni 	if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
276b47b5b34SRafael Vanoni 		pt_error("%s : failed to get speed 'statusrate'\n", __FILE__);
277b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
278b47b5b34SRafael Vanoni 	}
279b47b5b34SRafael Vanoni 
280b47b5b34SRafael Vanoni 	return (0);
281b47b5b34SRafael Vanoni }
282b47b5b34SRafael Vanoni 
283b47b5b34SRafael Vanoni /*
284b47b5b34SRafael Vanoni  * The DTrace probes have already been enabled, and are tracking
285b47b5b34SRafael Vanoni  * CPU speed transitions. Take a snapshot of the aggregations, and
286b47b5b34SRafael Vanoni  * look for any CPUs that have made a speed transition over the last
287b47b5b34SRafael Vanoni  * sampling interval. Note that the aggregations may be empty if no
288b47b5b34SRafael Vanoni  * speed transitions took place over the last interval. In that case,
289b47b5b34SRafael Vanoni  * notate that we have already accounted for the time, so that when
290b47b5b34SRafael Vanoni  * we do encounter a speed transition in a future sampling interval
291b47b5b34SRafael Vanoni  * we can subtract that time back out.
292b47b5b34SRafael Vanoni  */
293b47b5b34SRafael Vanoni int
294b47b5b34SRafael Vanoni pt_cpufreq_stat_collect(double interval)
295b47b5b34SRafael Vanoni {
296b47b5b34SRafael Vanoni 	int i, ret;
297b47b5b34SRafael Vanoni 
298b47b5b34SRafael Vanoni 	/*
299b47b5b34SRafael Vanoni 	 * Zero out the interval time reported by DTrace for
300b47b5b34SRafael Vanoni 	 * this interval
301b47b5b34SRafael Vanoni 	 */
302b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++)
303b47b5b34SRafael Vanoni 		g_pstate_info[i].total_time = 0;
304b47b5b34SRafael Vanoni 
305b47b5b34SRafael Vanoni 	for (i = 0; i < g_ncpus; i++)
306b47b5b34SRafael Vanoni 		g_cpu_power_states[i].dtrace_time = 0;
307b47b5b34SRafael Vanoni 
308b47b5b34SRafael Vanoni 	if (dtrace_status(dtp) == -1)
309b47b5b34SRafael Vanoni 		return (-1);
310b47b5b34SRafael Vanoni 
311b47b5b34SRafael Vanoni 	if (dtrace_aggregate_snap(dtp) != 0)
312b47b5b34SRafael Vanoni 		pt_error("%s : failed to add to stats aggregation", __FILE__);
313b47b5b34SRafael Vanoni 
314b47b5b34SRafael Vanoni 	if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
315b47b5b34SRafael Vanoni 	    NULL) != 0)
316b47b5b34SRafael Vanoni 		pt_error("%s : failed to sort stats aggregation", __FILE__);
317b47b5b34SRafael Vanoni 
318b47b5b34SRafael Vanoni 	dtrace_aggregate_clear(dtp);
319b47b5b34SRafael Vanoni 
320b47b5b34SRafael Vanoni 	if ((ret = pt_cpufreq_snapshot()) != 0) {
321b47b5b34SRafael Vanoni 		pt_error("%s : failed to add to stats aggregation", __FILE__);
322b47b5b34SRafael Vanoni 		return (ret);
323b47b5b34SRafael Vanoni 	}
324b47b5b34SRafael Vanoni 
325b47b5b34SRafael Vanoni 	switch (g_op_mode) {
326636423dbSRafael Vanoni 	case PT_MODE_CPU:
327b47b5b34SRafael Vanoni 		pt_cpufreq_stat_account(interval, g_observed_cpu);
328b47b5b34SRafael Vanoni 		break;
329636423dbSRafael Vanoni 	case PT_MODE_DEFAULT:
330b47b5b34SRafael Vanoni 	default:
331b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
332b47b5b34SRafael Vanoni 			pt_cpufreq_stat_account(interval, i);
333b47b5b34SRafael Vanoni 		break;
334b47b5b34SRafael Vanoni 	}
335b47b5b34SRafael Vanoni 
336b47b5b34SRafael Vanoni 	return (0);
337b47b5b34SRafael Vanoni }
338b47b5b34SRafael Vanoni 
339b47b5b34SRafael Vanoni static void
340b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu)
341b47b5b34SRafael Vanoni {
342636423dbSRafael Vanoni 	cpu_power_info_t 	*cpu_pow;
343b47b5b34SRafael Vanoni 	uint64_t 		speed;
344b47b5b34SRafael Vanoni 	hrtime_t 		duration;
345b47b5b34SRafael Vanoni 	int			i;
346b47b5b34SRafael Vanoni 
347b47b5b34SRafael Vanoni 	cpu_pow = &g_cpu_power_states[cpu];
348b47b5b34SRafael Vanoni 	speed = cpu_pow->current_pstate;
349b47b5b34SRafael Vanoni 
350636423dbSRafael Vanoni 	duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time;
351636423dbSRafael Vanoni 
352636423dbSRafael Vanoni 	/*
353636423dbSRafael Vanoni 	 * 'duration' may be a negative value when we're using or forcing a
354636423dbSRafael Vanoni 	 * small interval, and the amount of time already accounted ends up
355636423dbSRafael Vanoni 	 * being larger than the the former.
356636423dbSRafael Vanoni 	 */
357636423dbSRafael Vanoni 	if (duration < 0)
358636423dbSRafael Vanoni 		return;
359b47b5b34SRafael Vanoni 
360b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++) {
361b47b5b34SRafael Vanoni 		if (g_pstate_info[i].speed == speed) {
362b47b5b34SRafael Vanoni 			g_pstate_info[i].total_time += duration;
363b47b5b34SRafael Vanoni 			cpu_pow->time_accounted += duration;
364636423dbSRafael Vanoni 			cpu_pow->speed_accounted = speed;
365b47b5b34SRafael Vanoni 		}
366b47b5b34SRafael Vanoni 	}
367b47b5b34SRafael Vanoni }
368b47b5b34SRafael Vanoni 
369b47b5b34SRafael Vanoni /*
370b47b5b34SRafael Vanoni  * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
371b47b5b34SRafael Vanoni  */
372b47b5b34SRafael Vanoni static int
373b47b5b34SRafael Vanoni pt_cpufreq_snapshot(void)
374b47b5b34SRafael Vanoni {
375b47b5b34SRafael Vanoni 	kstat_ctl_t 	*kc;
376b47b5b34SRafael Vanoni 	int 		ret;
377b47b5b34SRafael Vanoni 	uint_t		i;
378b47b5b34SRafael Vanoni 
379b47b5b34SRafael Vanoni 	if ((kc = kstat_open()) == NULL)
380b47b5b34SRafael Vanoni 		return (errno);
381b47b5b34SRafael Vanoni 
382b47b5b34SRafael Vanoni 	switch (g_op_mode) {
383636423dbSRafael Vanoni 	case PT_MODE_CPU:
384b47b5b34SRafael Vanoni 		ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
385b47b5b34SRafael Vanoni 		break;
386636423dbSRafael Vanoni 	case PT_MODE_DEFAULT:
387b47b5b34SRafael Vanoni 	default:
388b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
389b47b5b34SRafael Vanoni 			if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
390b47b5b34SRafael Vanoni 				break;
391b47b5b34SRafael Vanoni 		break;
392b47b5b34SRafael Vanoni 	}
393b47b5b34SRafael Vanoni 
394b47b5b34SRafael Vanoni 	if (kstat_close(kc) != 0)
395b47b5b34SRafael Vanoni 		pt_error("%s : couldn't close kstat\n", __FILE__);
396b47b5b34SRafael Vanoni 
397b47b5b34SRafael Vanoni 	return (ret);
398b47b5b34SRafael Vanoni }
399b47b5b34SRafael Vanoni 
400b47b5b34SRafael Vanoni static int
401b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
402b47b5b34SRafael Vanoni {
403b47b5b34SRafael Vanoni 	kstat_t 		*ksp;
404b47b5b34SRafael Vanoni 	kstat_named_t 		*knp;
405b47b5b34SRafael Vanoni 
406b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
407b47b5b34SRafael Vanoni 	if (ksp == NULL) {
408b47b5b34SRafael Vanoni 		pt_error("%s : couldn't find cpu_info kstat for CPU "
409b47b5b34SRafael Vanoni 		"%d\n", __FILE__, cpu);
410b47b5b34SRafael Vanoni 		return (1);
411b47b5b34SRafael Vanoni 	}
412b47b5b34SRafael Vanoni 
413b47b5b34SRafael Vanoni 	if (kstat_read(kc, ksp, NULL) == -1) {
414b47b5b34SRafael Vanoni 		pt_error("%s : couldn't read cpu_info kstat for "
415b47b5b34SRafael Vanoni 		    "CPU %d\n", __FILE__, cpu);
416b47b5b34SRafael Vanoni 		return (2);
417b47b5b34SRafael Vanoni 	}
418b47b5b34SRafael Vanoni 
419b47b5b34SRafael Vanoni 	knp = kstat_data_lookup(ksp, "current_clock_Hz");
420b47b5b34SRafael Vanoni 	if (knp == NULL) {
421b47b5b34SRafael Vanoni 		pt_error("%s : couldn't find current_clock_Hz "
422b47b5b34SRafael Vanoni 		    "kstat for CPU %d\n", __FILE__, cpu);
423b47b5b34SRafael Vanoni 		return (3);
424b47b5b34SRafael Vanoni 	}
425b47b5b34SRafael Vanoni 
426b47b5b34SRafael Vanoni 	g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
427b47b5b34SRafael Vanoni 
428b47b5b34SRafael Vanoni 	return (0);
429b47b5b34SRafael Vanoni }
430b47b5b34SRafael Vanoni 
431b47b5b34SRafael Vanoni /*
432b47b5b34SRafael Vanoni  * DTrace aggregation walker that sorts through a snapshot of the
433b47b5b34SRafael Vanoni  * aggregation data collected during firings of the cpu-change-speed
434b47b5b34SRafael Vanoni  * probe.
435b47b5b34SRafael Vanoni  */
436b47b5b34SRafael Vanoni /*ARGSUSED*/
437b47b5b34SRafael Vanoni static int
438b47b5b34SRafael Vanoni pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
439b47b5b34SRafael Vanoni {
440b47b5b34SRafael Vanoni 	dtrace_aggdesc_t 	*aggdesc = data->dtada_desc;
441b47b5b34SRafael Vanoni 	dtrace_recdesc_t 	*cpu_rec, *speed_rec;
442636423dbSRafael Vanoni 	cpu_power_info_t 	*cp;
443b47b5b34SRafael Vanoni 	int32_t 		cpu;
444b47b5b34SRafael Vanoni 	uint64_t 		speed;
445636423dbSRafael Vanoni 	hrtime_t 		res;
446b47b5b34SRafael Vanoni 	int 			i;
447b47b5b34SRafael Vanoni 
448b47b5b34SRafael Vanoni 	if (strcmp(aggdesc->dtagd_name, "times") == 0) {
449b47b5b34SRafael Vanoni 		cpu_rec = &aggdesc->dtagd_rec[1];
450b47b5b34SRafael Vanoni 		speed_rec = &aggdesc->dtagd_rec[2];
451b47b5b34SRafael Vanoni 
452b47b5b34SRafael Vanoni 		/* LINTED - alignment */
453b47b5b34SRafael Vanoni 		cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
454636423dbSRafael Vanoni 
455636423dbSRafael Vanoni 		/* LINTED - alignment */
456636423dbSRafael Vanoni 		res = *((hrtime_t *)(data->dtada_percpu[cpu]));
457636423dbSRafael Vanoni 
458b47b5b34SRafael Vanoni 		/* LINTED - alignment */
459b47b5b34SRafael Vanoni 		speed = *(uint64_t *)(data->dtada_data +
460b47b5b34SRafael Vanoni 		    speed_rec->dtrd_offset);
461b47b5b34SRafael Vanoni 
462636423dbSRafael Vanoni 		if (speed == 0)
463b47b5b34SRafael Vanoni 			speed = max_cpufreq;
464636423dbSRafael Vanoni 		else
465636423dbSRafael Vanoni 			speed = HZ2MHZ(speed);
466b47b5b34SRafael Vanoni 
467b47b5b34SRafael Vanoni 		/*
468b47b5b34SRafael Vanoni 		 * We have an aggregation record for "cpu" being at "speed"
469b47b5b34SRafael Vanoni 		 * for an interval of "n" nanoseconds. The reported interval
470b47b5b34SRafael Vanoni 		 * may exceed the powertop sampling interval, since we only
471b47b5b34SRafael Vanoni 		 * notice during potentially infrequent firings of the
472b47b5b34SRafael Vanoni 		 * "speed change" DTrace probe. In this case powertop would
473b47b5b34SRafael Vanoni 		 * have already accounted for the portions of the interval
474b47b5b34SRafael Vanoni 		 * that happened during prior powertop samplings, so subtract
475b47b5b34SRafael Vanoni 		 * out time already accounted.
476b47b5b34SRafael Vanoni 		 */
477636423dbSRafael Vanoni 		cp = &g_cpu_power_states[cpu];
478b47b5b34SRafael Vanoni 
479b47b5b34SRafael Vanoni 		for (i = 0; i < g_npstates; i++) {
480b47b5b34SRafael Vanoni 			if (g_pstate_info[i].speed == speed) {
481636423dbSRafael Vanoni 
482636423dbSRafael Vanoni 				if (cp->time_accounted > 0 &&
483636423dbSRafael Vanoni 				    cp->speed_accounted == speed) {
484636423dbSRafael Vanoni 					if (res > cp->time_accounted) {
485636423dbSRafael Vanoni 						res -= cp->time_accounted;
486636423dbSRafael Vanoni 						cp->time_accounted = 0;
487636423dbSRafael Vanoni 						cp->speed_accounted = 0;
488636423dbSRafael Vanoni 					} else {
489636423dbSRafael Vanoni 						return (DTRACE_AGGWALK_NEXT);
490b47b5b34SRafael Vanoni 					}
491b47b5b34SRafael Vanoni 				}
492636423dbSRafael Vanoni 
493636423dbSRafael Vanoni 				g_pstate_info[i].total_time += res;
494636423dbSRafael Vanoni 				cp->dtrace_time += res;
495b47b5b34SRafael Vanoni 			}
496b47b5b34SRafael Vanoni 		}
497b47b5b34SRafael Vanoni 	}
498636423dbSRafael Vanoni 
499b47b5b34SRafael Vanoni 	return (DTRACE_AGGWALK_NEXT);
500b47b5b34SRafael Vanoni }
501b47b5b34SRafael Vanoni 
502b47b5b34SRafael Vanoni /*
503*9bbf5ba1SRafael Vanoni  * Checks if PM is enabled in /etc/power.conf, enabling if not
504*9bbf5ba1SRafael Vanoni  */
505*9bbf5ba1SRafael Vanoni void
506*9bbf5ba1SRafael Vanoni pt_cpufreq_suggest(void)
507*9bbf5ba1SRafael Vanoni {
508*9bbf5ba1SRafael Vanoni 	int ret = pt_cpufreq_check_pm();
509*9bbf5ba1SRafael Vanoni 
510*9bbf5ba1SRafael Vanoni 	switch (ret) {
511*9bbf5ba1SRafael Vanoni 	case 0:
512*9bbf5ba1SRafael Vanoni 		pt_sugg_add("Suggestion: enable CPU power management by "
513*9bbf5ba1SRafael Vanoni 		    "pressing the P key", 40, 'P', (char *)g_msg_freq_enable,
514*9bbf5ba1SRafael Vanoni 		    pt_cpufreq_enable);
515*9bbf5ba1SRafael Vanoni 		break;
516*9bbf5ba1SRafael Vanoni 	}
517*9bbf5ba1SRafael Vanoni }
518*9bbf5ba1SRafael Vanoni 
519*9bbf5ba1SRafael Vanoni /*
520*9bbf5ba1SRafael Vanoni  * Checks /etc/power.conf and returns:
521*9bbf5ba1SRafael Vanoni  *
522*9bbf5ba1SRafael Vanoni  *     0 if CPUPM is not enabled
523*9bbf5ba1SRafael Vanoni  *     1 if there's nothing for us to do because:
524*9bbf5ba1SRafael Vanoni  *         (a) the system does not support frequency scaling
525*9bbf5ba1SRafael Vanoni  *         (b) there's no power.conf.
526*9bbf5ba1SRafael Vanoni  *     2 if CPUPM is enabled
527*9bbf5ba1SRafael Vanoni  *     3 if the system is running in poll-mode, as opposed to event-mode
528*9bbf5ba1SRafael Vanoni  *
529*9bbf5ba1SRafael Vanoni  * Notice the ordering of the return values, they will be picked up and
530*9bbf5ba1SRafael Vanoni  * switched upon ascendingly.
531*9bbf5ba1SRafael Vanoni  */
532*9bbf5ba1SRafael Vanoni static int
533*9bbf5ba1SRafael Vanoni pt_cpufreq_check_pm(void)
534*9bbf5ba1SRafael Vanoni {
535*9bbf5ba1SRafael Vanoni 	char line[1024];
536*9bbf5ba1SRafael Vanoni 	FILE *file;
537*9bbf5ba1SRafael Vanoni 	int ret = 0;
538*9bbf5ba1SRafael Vanoni 
539*9bbf5ba1SRafael Vanoni 	if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL)
540*9bbf5ba1SRafael Vanoni 		return (1);
541*9bbf5ba1SRafael Vanoni 
542*9bbf5ba1SRafael Vanoni 	(void) memset(line, 0, 1024);
543*9bbf5ba1SRafael Vanoni 
544*9bbf5ba1SRafael Vanoni 	while (fgets(line, 1024, file)) {
545*9bbf5ba1SRafael Vanoni 		if (strstr(line, "cpupm")) {
546*9bbf5ba1SRafael Vanoni 			if (strstr(line, "enable")) {
547*9bbf5ba1SRafael Vanoni 				(void) fclose(file);
548*9bbf5ba1SRafael Vanoni 				return (2);
549*9bbf5ba1SRafael Vanoni 			}
550*9bbf5ba1SRafael Vanoni 		}
551*9bbf5ba1SRafael Vanoni 		if (strstr(line, "poll"))
552*9bbf5ba1SRafael Vanoni 			ret = 3;
553*9bbf5ba1SRafael Vanoni 	}
554*9bbf5ba1SRafael Vanoni 
555*9bbf5ba1SRafael Vanoni 	(void) fclose(file);
556*9bbf5ba1SRafael Vanoni 
557*9bbf5ba1SRafael Vanoni 	return (ret);
558*9bbf5ba1SRafael Vanoni }
559*9bbf5ba1SRafael Vanoni 
560*9bbf5ba1SRafael Vanoni /*
561b47b5b34SRafael Vanoni  * Used as a suggestion, sets PM in /etc/power.conf and
562b47b5b34SRafael Vanoni  * a 1sec threshold, then calls /usr/sbin/pmconfig
563b47b5b34SRafael Vanoni  */
564*9bbf5ba1SRafael Vanoni static void
565*9bbf5ba1SRafael Vanoni pt_cpufreq_enable(void)
566b47b5b34SRafael Vanoni {
567b47b5b34SRafael Vanoni 	(void) system(cpupm_enable);
568b47b5b34SRafael Vanoni 	(void) system(cpupm_treshold);
569b47b5b34SRafael Vanoni 	(void) system(default_pmconf);
570b47b5b34SRafael Vanoni 
571*9bbf5ba1SRafael Vanoni 	if (pt_sugg_remove(pt_cpufreq_enable) == 0)
572*9bbf5ba1SRafael Vanoni 		pt_error("%s : failed to remove a sugg.\n", __FILE__);
573b47b5b34SRafael Vanoni }
574