xref: /titanic_53/usr/src/cmd/powertop/common/cpufreq.c (revision b47b5b34b42fa8056577c43496cdb99a4c99f8d7)
1*b47b5b34SRafael Vanoni /*
2*b47b5b34SRafael Vanoni  * Copyright 2009, Intel Corporation
3*b47b5b34SRafael Vanoni  * Copyright 2009, Sun Microsystems, Inc
4*b47b5b34SRafael Vanoni  *
5*b47b5b34SRafael Vanoni  * This file is part of PowerTOP
6*b47b5b34SRafael Vanoni  *
7*b47b5b34SRafael Vanoni  * This program file is free software; you can redistribute it and/or modify it
8*b47b5b34SRafael Vanoni  * under the terms of the GNU General Public License as published by the
9*b47b5b34SRafael Vanoni  * Free Software Foundation; version 2 of the License.
10*b47b5b34SRafael Vanoni  *
11*b47b5b34SRafael Vanoni  * This program is distributed in the hope that it will be useful, but WITHOUT
12*b47b5b34SRafael Vanoni  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*b47b5b34SRafael Vanoni  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*b47b5b34SRafael Vanoni  * for more details.
15*b47b5b34SRafael Vanoni  *
16*b47b5b34SRafael Vanoni  * You should have received a copy of the GNU General Public License
17*b47b5b34SRafael Vanoni  * along with this program in a file named COPYING; if not, write to the
18*b47b5b34SRafael Vanoni  * Free Software Foundation, Inc.,
19*b47b5b34SRafael Vanoni  * 51 Franklin Street, Fifth Floor,
20*b47b5b34SRafael Vanoni  * Boston, MA 02110-1301 USA
21*b47b5b34SRafael Vanoni  *
22*b47b5b34SRafael Vanoni  * Authors:
23*b47b5b34SRafael Vanoni  *	Arjan van de Ven <arjan@linux.intel.com>
24*b47b5b34SRafael Vanoni  *	Eric C Saxe <eric.saxe@sun.com>
25*b47b5b34SRafael Vanoni  *	Aubrey Li <aubrey.li@intel.com>
26*b47b5b34SRafael Vanoni  */
27*b47b5b34SRafael Vanoni 
28*b47b5b34SRafael Vanoni /*
29*b47b5b34SRafael Vanoni  * GPL Disclaimer
30*b47b5b34SRafael Vanoni  *
31*b47b5b34SRafael Vanoni  * For the avoidance of doubt, except that if any license choice other
32*b47b5b34SRafael Vanoni  * than GPL or LGPL is available it will apply instead, Sun elects to
33*b47b5b34SRafael Vanoni  * use only the General Public License version 2 (GPLv2) at this time
34*b47b5b34SRafael Vanoni  * for any software where a choice of GPL license versions is made
35*b47b5b34SRafael Vanoni  * available with the language indicating that GPLv2 or any later
36*b47b5b34SRafael Vanoni  * version may be used, or where a choice of which version of the GPL
37*b47b5b34SRafael Vanoni  * is applied is otherwise unspecified.
38*b47b5b34SRafael Vanoni  */
39*b47b5b34SRafael Vanoni 
40*b47b5b34SRafael Vanoni #include <stdlib.h>
41*b47b5b34SRafael Vanoni #include <string.h>
42*b47b5b34SRafael Vanoni #include <dtrace.h>
43*b47b5b34SRafael Vanoni #include <kstat.h>
44*b47b5b34SRafael Vanoni #include <errno.h>
45*b47b5b34SRafael Vanoni #include "powertop.h"
46*b47b5b34SRafael Vanoni 
47*b47b5b34SRafael Vanoni #define	HZ2MHZ(speed)	((speed) / 1000000)
48*b47b5b34SRafael Vanoni #define	DTP_ARG_COUNT	2
49*b47b5b34SRafael Vanoni #define	DTP_ARG_LENGTH	5
50*b47b5b34SRafael Vanoni 
51*b47b5b34SRafael Vanoni static uint64_t		max_cpufreq = 0;
52*b47b5b34SRafael Vanoni static dtrace_hdl_t	*dtp;
53*b47b5b34SRafael Vanoni static char		**dtp_argv;
54*b47b5b34SRafael Vanoni 
55*b47b5b34SRafael Vanoni /*
56*b47b5b34SRafael Vanoni  * Enabling PM through /etc/power.conf
57*b47b5b34SRafael Vanoni  * See suggest_p_state()
58*b47b5b34SRafael Vanoni  */
59*b47b5b34SRafael Vanoni static char default_conf[]	= "/etc/power.conf";
60*b47b5b34SRafael Vanoni static char default_pmconf[]	= "/usr/sbin/pmconfig";
61*b47b5b34SRafael Vanoni static char cpupm_enable[]	= " echo cpupm enable >> /etc/power.conf";
62*b47b5b34SRafael Vanoni static char cpupm_treshold[]	= " echo cpu-threshold 1s >> /etc/power.conf";
63*b47b5b34SRafael Vanoni 
64*b47b5b34SRafael Vanoni /*
65*b47b5b34SRafael Vanoni  * Buffer containing DTrace program to track CPU frequency transitions
66*b47b5b34SRafael Vanoni  */
67*b47b5b34SRafael Vanoni static const char *dtp_cpufreq =
68*b47b5b34SRafael Vanoni "hrtime_t last[$0];"
69*b47b5b34SRafael Vanoni ""
70*b47b5b34SRafael Vanoni "BEGIN"
71*b47b5b34SRafael Vanoni "{"
72*b47b5b34SRafael Vanoni "	begin = timestamp;"
73*b47b5b34SRafael Vanoni "}"
74*b47b5b34SRafael Vanoni ""
75*b47b5b34SRafael Vanoni ":::cpu-change-speed"
76*b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] != 0/"
77*b47b5b34SRafael Vanoni "{"
78*b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
79*b47b5b34SRafael Vanoni "	this->oldspeed = (uint32_t)(arg1/1000000);"
80*b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
81*b47b5b34SRafael Vanoni "	last[this->cpu] = timestamp;"
82*b47b5b34SRafael Vanoni "}"
83*b47b5b34SRafael Vanoni ":::cpu-change-speed"
84*b47b5b34SRafael Vanoni "/last[(processorid_t)arg0] == 0/"
85*b47b5b34SRafael Vanoni "{"
86*b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
87*b47b5b34SRafael Vanoni "	this->oldspeed = (uint32_t)(arg1/1000000);"
88*b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
89*b47b5b34SRafael Vanoni "	last[this->cpu] = timestamp;"
90*b47b5b34SRafael Vanoni "}";
91*b47b5b34SRafael Vanoni 
92*b47b5b34SRafael Vanoni /*
93*b47b5b34SRafael Vanoni  * Same as above, but only for a specific CPU
94*b47b5b34SRafael Vanoni  */
95*b47b5b34SRafael Vanoni static const char *dtp_cpufreq_c =
96*b47b5b34SRafael Vanoni "hrtime_t last;"
97*b47b5b34SRafael Vanoni ""
98*b47b5b34SRafael Vanoni "BEGIN"
99*b47b5b34SRafael Vanoni "{"
100*b47b5b34SRafael Vanoni "	begin = timestamp;"
101*b47b5b34SRafael Vanoni "}"
102*b47b5b34SRafael Vanoni ""
103*b47b5b34SRafael Vanoni ":::cpu-change-speed"
104*b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
105*b47b5b34SRafael Vanoni " last != 0/"
106*b47b5b34SRafael Vanoni "{"
107*b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
108*b47b5b34SRafael Vanoni "	this->oldspeed = (uint32_t)(arg1/1000000);"
109*b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - last);"
110*b47b5b34SRafael Vanoni "	last = timestamp;"
111*b47b5b34SRafael Vanoni "}"
112*b47b5b34SRafael Vanoni ":::cpu-change-speed"
113*b47b5b34SRafael Vanoni "/(processorid_t)arg0 == $1 &&"
114*b47b5b34SRafael Vanoni " last == 0/"
115*b47b5b34SRafael Vanoni "{"
116*b47b5b34SRafael Vanoni "	this->cpu = (processorid_t)arg0;"
117*b47b5b34SRafael Vanoni "	this->oldspeed = (uint32_t)(arg1/1000000);"
118*b47b5b34SRafael Vanoni "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
119*b47b5b34SRafael Vanoni "	last = timestamp;"
120*b47b5b34SRafael Vanoni "}";
121*b47b5b34SRafael Vanoni 
122*b47b5b34SRafael Vanoni static int	pt_cpufreq_setup(void);
123*b47b5b34SRafael Vanoni static int	pt_cpufreq_snapshot(void);
124*b47b5b34SRafael Vanoni static int	pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
125*b47b5b34SRafael Vanoni static void	pt_cpufreq_stat_account(double, uint_t);
126*b47b5b34SRafael Vanoni static int	pt_cpufreq_snapshot_cpu(kstat_ctl_t *,
127*b47b5b34SRafael Vanoni     uint_t);
128*b47b5b34SRafael Vanoni 
129*b47b5b34SRafael Vanoni static int
130*b47b5b34SRafael Vanoni pt_cpufreq_setup(void)
131*b47b5b34SRafael Vanoni {
132*b47b5b34SRafael Vanoni 	if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
133*b47b5b34SRafael Vanoni 		return (EXIT_FAILURE);
134*b47b5b34SRafael Vanoni 
135*b47b5b34SRafael Vanoni 	if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
136*b47b5b34SRafael Vanoni 		free(dtp_argv);
137*b47b5b34SRafael Vanoni 		return (EXIT_FAILURE);
138*b47b5b34SRafael Vanoni 	}
139*b47b5b34SRafael Vanoni 
140*b47b5b34SRafael Vanoni 	(void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
141*b47b5b34SRafael Vanoni 
142*b47b5b34SRafael Vanoni 	if (PTOP_ON_CPU) {
143*b47b5b34SRafael Vanoni 		if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
144*b47b5b34SRafael Vanoni 		    == NULL) {
145*b47b5b34SRafael Vanoni 			free(dtp_argv[0]);
146*b47b5b34SRafael Vanoni 			free(dtp_argv);
147*b47b5b34SRafael Vanoni 			return (EXIT_FAILURE);
148*b47b5b34SRafael Vanoni 		}
149*b47b5b34SRafael Vanoni 		(void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
150*b47b5b34SRafael Vanoni 	}
151*b47b5b34SRafael Vanoni 
152*b47b5b34SRafael Vanoni 	return (0);
153*b47b5b34SRafael Vanoni }
154*b47b5b34SRafael Vanoni 
155*b47b5b34SRafael Vanoni /*
156*b47b5b34SRafael Vanoni  * Perform setup necessary to enumerate and track CPU speed changes
157*b47b5b34SRafael Vanoni  */
158*b47b5b34SRafael Vanoni int
159*b47b5b34SRafael Vanoni pt_cpufreq_stat_prepare(void)
160*b47b5b34SRafael Vanoni {
161*b47b5b34SRafael Vanoni 	dtrace_prog_t 		*prog;
162*b47b5b34SRafael Vanoni 	dtrace_proginfo_t 	info;
163*b47b5b34SRafael Vanoni 	dtrace_optval_t 	statustime;
164*b47b5b34SRafael Vanoni 	kstat_ctl_t 		*kc;
165*b47b5b34SRafael Vanoni 	kstat_t 		*ksp;
166*b47b5b34SRafael Vanoni 	kstat_named_t 		*knp;
167*b47b5b34SRafael Vanoni 	freq_state_info_t 	*state;
168*b47b5b34SRafael Vanoni 	char 			*s, *token, *prog_ptr;
169*b47b5b34SRafael Vanoni 	int 			err;
170*b47b5b34SRafael Vanoni 
171*b47b5b34SRafael Vanoni 	if ((err = pt_cpufreq_setup()) != 0) {
172*b47b5b34SRafael Vanoni 		pt_error("%s : failed to setup", __FILE__);
173*b47b5b34SRafael Vanoni 		return (errno);
174*b47b5b34SRafael Vanoni 	}
175*b47b5b34SRafael Vanoni 
176*b47b5b34SRafael Vanoni 	state = g_pstate_info;
177*b47b5b34SRafael Vanoni 	if ((g_cpu_power_states = calloc((size_t)g_ncpus,
178*b47b5b34SRafael Vanoni 	    sizeof (cpu_power_info_t))) == NULL)
179*b47b5b34SRafael Vanoni 		return (-1);
180*b47b5b34SRafael Vanoni 
181*b47b5b34SRafael Vanoni 	/*
182*b47b5b34SRafael Vanoni 	 * Enumerate the CPU frequencies
183*b47b5b34SRafael Vanoni 	 */
184*b47b5b34SRafael Vanoni 	if ((kc = kstat_open()) == NULL)
185*b47b5b34SRafael Vanoni 		return (errno);
186*b47b5b34SRafael Vanoni 
187*b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
188*b47b5b34SRafael Vanoni 
189*b47b5b34SRafael Vanoni 	if (ksp == NULL) {
190*b47b5b34SRafael Vanoni 		err = errno;
191*b47b5b34SRafael Vanoni 		(void) kstat_close(kc);
192*b47b5b34SRafael Vanoni 		return (err);
193*b47b5b34SRafael Vanoni 	}
194*b47b5b34SRafael Vanoni 
195*b47b5b34SRafael Vanoni 	(void) kstat_read(kc, ksp, NULL);
196*b47b5b34SRafael Vanoni 
197*b47b5b34SRafael Vanoni 	knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
198*b47b5b34SRafael Vanoni 	s = knp->value.str.addr.ptr;
199*b47b5b34SRafael Vanoni 
200*b47b5b34SRafael Vanoni 	g_npstates = 0;
201*b47b5b34SRafael Vanoni 
202*b47b5b34SRafael Vanoni 	for (token = strtok(s, ":"), s = NULL;
203*b47b5b34SRafael Vanoni 	    NULL != token && g_npstates < NSTATES;
204*b47b5b34SRafael Vanoni 	    token = strtok(NULL, ":")) {
205*b47b5b34SRafael Vanoni 
206*b47b5b34SRafael Vanoni 		state->speed = HZ2MHZ(atoll(token));
207*b47b5b34SRafael Vanoni 
208*b47b5b34SRafael Vanoni 		if (state->speed > max_cpufreq)
209*b47b5b34SRafael Vanoni 			max_cpufreq = state->speed;
210*b47b5b34SRafael Vanoni 
211*b47b5b34SRafael Vanoni 		state->total_time = (uint64_t)0;
212*b47b5b34SRafael Vanoni 
213*b47b5b34SRafael Vanoni 		g_npstates++;
214*b47b5b34SRafael Vanoni 		state++;
215*b47b5b34SRafael Vanoni 	}
216*b47b5b34SRafael Vanoni 
217*b47b5b34SRafael Vanoni 	if (token != NULL)
218*b47b5b34SRafael Vanoni 		pt_error("%s : exceeded NSTATES\n", __FILE__);
219*b47b5b34SRafael Vanoni 
220*b47b5b34SRafael Vanoni 	(void) kstat_close(kc);
221*b47b5b34SRafael Vanoni 
222*b47b5b34SRafael Vanoni 	/*
223*b47b5b34SRafael Vanoni 	 * Return if speed transition is not supported
224*b47b5b34SRafael Vanoni 	 */
225*b47b5b34SRafael Vanoni 	if (g_npstates < 2)
226*b47b5b34SRafael Vanoni 		return (-1);
227*b47b5b34SRafael Vanoni 
228*b47b5b34SRafael Vanoni 	/*
229*b47b5b34SRafael Vanoni 	 * Setup DTrace to look for CPU frequency changes
230*b47b5b34SRafael Vanoni 	 */
231*b47b5b34SRafael Vanoni 	if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
232*b47b5b34SRafael Vanoni 		pt_error("%s : cannot open dtrace library: %s\n", __FILE__,
233*b47b5b34SRafael Vanoni 		    dtrace_errmsg(NULL, err));
234*b47b5b34SRafael Vanoni 		return (-2);
235*b47b5b34SRafael Vanoni 	}
236*b47b5b34SRafael Vanoni 
237*b47b5b34SRafael Vanoni 	/*
238*b47b5b34SRafael Vanoni 	 * Execute different scripts (defined above) depending on
239*b47b5b34SRafael Vanoni 	 * user specified options. Default mode uses dtp_cpufreq.
240*b47b5b34SRafael Vanoni 	 */
241*b47b5b34SRafael Vanoni 	if (PTOP_ON_CPU)
242*b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq_c;
243*b47b5b34SRafael Vanoni 	else
244*b47b5b34SRafael Vanoni 		prog_ptr = (char *)dtp_cpufreq;
245*b47b5b34SRafael Vanoni 
246*b47b5b34SRafael Vanoni 	if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
247*b47b5b34SRafael Vanoni 	    DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
248*b47b5b34SRafael Vanoni 		pt_error("%s : cpu-change-speed probe unavailable\n", __FILE__);
249*b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
250*b47b5b34SRafael Vanoni 	}
251*b47b5b34SRafael Vanoni 
252*b47b5b34SRafael Vanoni 	if (dtrace_program_exec(dtp, prog, &info) == -1) {
253*b47b5b34SRafael Vanoni 		pt_error("%s : failed to enable speed probe\n", __FILE__);
254*b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
255*b47b5b34SRafael Vanoni 	}
256*b47b5b34SRafael Vanoni 
257*b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggsize", "128k") == -1) {
258*b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggsize'\n", __FILE__);
259*b47b5b34SRafael Vanoni 	}
260*b47b5b34SRafael Vanoni 
261*b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggrate", "0") == -1) {
262*b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggrate'\n", __FILE__);
263*b47b5b34SRafael Vanoni 	}
264*b47b5b34SRafael Vanoni 
265*b47b5b34SRafael Vanoni 	if (dtrace_setopt(dtp, "aggpercpu", 0) == -1) {
266*b47b5b34SRafael Vanoni 		pt_error("%s : failed to set speed 'aggpercpu'\n", __FILE__);
267*b47b5b34SRafael Vanoni 	}
268*b47b5b34SRafael Vanoni 
269*b47b5b34SRafael Vanoni 	if (dtrace_go(dtp) != 0) {
270*b47b5b34SRafael Vanoni 		pt_error("%s : failed to start speed observation", __FILE__);
271*b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
272*b47b5b34SRafael Vanoni 	}
273*b47b5b34SRafael Vanoni 
274*b47b5b34SRafael Vanoni 	if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
275*b47b5b34SRafael Vanoni 		pt_error("%s : failed to get speed 'statusrate'\n", __FILE__);
276*b47b5b34SRafael Vanoni 		return (dtrace_errno(dtp));
277*b47b5b34SRafael Vanoni 	}
278*b47b5b34SRafael Vanoni 
279*b47b5b34SRafael Vanoni 	return (0);
280*b47b5b34SRafael Vanoni }
281*b47b5b34SRafael Vanoni 
282*b47b5b34SRafael Vanoni /*
283*b47b5b34SRafael Vanoni  * The DTrace probes have already been enabled, and are tracking
284*b47b5b34SRafael Vanoni  * CPU speed transitions. Take a snapshot of the aggregations, and
285*b47b5b34SRafael Vanoni  * look for any CPUs that have made a speed transition over the last
286*b47b5b34SRafael Vanoni  * sampling interval. Note that the aggregations may be empty if no
287*b47b5b34SRafael Vanoni  * speed transitions took place over the last interval. In that case,
288*b47b5b34SRafael Vanoni  * notate that we have already accounted for the time, so that when
289*b47b5b34SRafael Vanoni  * we do encounter a speed transition in a future sampling interval
290*b47b5b34SRafael Vanoni  * we can subtract that time back out.
291*b47b5b34SRafael Vanoni  */
292*b47b5b34SRafael Vanoni int
293*b47b5b34SRafael Vanoni pt_cpufreq_stat_collect(double interval)
294*b47b5b34SRafael Vanoni {
295*b47b5b34SRafael Vanoni 	int	i, ret;
296*b47b5b34SRafael Vanoni 
297*b47b5b34SRafael Vanoni 	/*
298*b47b5b34SRafael Vanoni 	 * Zero out the interval time reported by DTrace for
299*b47b5b34SRafael Vanoni 	 * this interval
300*b47b5b34SRafael Vanoni 	 */
301*b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++)
302*b47b5b34SRafael Vanoni 		g_pstate_info[i].total_time = 0;
303*b47b5b34SRafael Vanoni 
304*b47b5b34SRafael Vanoni 	for (i = 0; i < g_ncpus; i++)
305*b47b5b34SRafael Vanoni 		g_cpu_power_states[i].dtrace_time = 0;
306*b47b5b34SRafael Vanoni 
307*b47b5b34SRafael Vanoni 	if (dtrace_status(dtp) == -1)
308*b47b5b34SRafael Vanoni 		return (-1);
309*b47b5b34SRafael Vanoni 
310*b47b5b34SRafael Vanoni 	if (dtrace_aggregate_snap(dtp) != 0)
311*b47b5b34SRafael Vanoni 		pt_error("%s : failed to add to stats aggregation", __FILE__);
312*b47b5b34SRafael Vanoni 
313*b47b5b34SRafael Vanoni 	if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
314*b47b5b34SRafael Vanoni 	    NULL) != 0)
315*b47b5b34SRafael Vanoni 		pt_error("%s : failed to sort stats aggregation", __FILE__);
316*b47b5b34SRafael Vanoni 
317*b47b5b34SRafael Vanoni 	dtrace_aggregate_clear(dtp);
318*b47b5b34SRafael Vanoni 
319*b47b5b34SRafael Vanoni 	if ((ret = pt_cpufreq_snapshot()) != 0) {
320*b47b5b34SRafael Vanoni 		pt_error("%s : failed to add to stats aggregation", __FILE__);
321*b47b5b34SRafael Vanoni 		return (ret);
322*b47b5b34SRafael Vanoni 	}
323*b47b5b34SRafael Vanoni 
324*b47b5b34SRafael Vanoni 	switch (g_op_mode) {
325*b47b5b34SRafael Vanoni 	case PTOP_MODE_CPU:
326*b47b5b34SRafael Vanoni 		pt_cpufreq_stat_account(interval, g_observed_cpu);
327*b47b5b34SRafael Vanoni 		break;
328*b47b5b34SRafael Vanoni 	case PTOP_MODE_DEFAULT:
329*b47b5b34SRafael Vanoni 	default:
330*b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
331*b47b5b34SRafael Vanoni 			pt_cpufreq_stat_account(interval, i);
332*b47b5b34SRafael Vanoni 		break;
333*b47b5b34SRafael Vanoni 	}
334*b47b5b34SRafael Vanoni 
335*b47b5b34SRafael Vanoni 	return (0);
336*b47b5b34SRafael Vanoni }
337*b47b5b34SRafael Vanoni 
338*b47b5b34SRafael Vanoni static void
339*b47b5b34SRafael Vanoni pt_cpufreq_stat_account(double interval, uint_t cpu)
340*b47b5b34SRafael Vanoni {
341*b47b5b34SRafael Vanoni 	uint64_t 		speed;
342*b47b5b34SRafael Vanoni 	hrtime_t 		duration;
343*b47b5b34SRafael Vanoni 	cpu_power_info_t 	*cpu_pow;
344*b47b5b34SRafael Vanoni 	int			i;
345*b47b5b34SRafael Vanoni 
346*b47b5b34SRafael Vanoni 	cpu_pow = &g_cpu_power_states[cpu];
347*b47b5b34SRafael Vanoni 	speed = cpu_pow->current_pstate;
348*b47b5b34SRafael Vanoni 
349*b47b5b34SRafael Vanoni 	duration = (hrtime_t)((interval * NANOSEC)) - cpu_pow->dtrace_time;
350*b47b5b34SRafael Vanoni 
351*b47b5b34SRafael Vanoni 	for (i = 0; i < g_npstates; i++) {
352*b47b5b34SRafael Vanoni 		if (g_pstate_info[i].speed == speed) {
353*b47b5b34SRafael Vanoni 			g_pstate_info[i].total_time += duration;
354*b47b5b34SRafael Vanoni 			cpu_pow->time_accounted += duration;
355*b47b5b34SRafael Vanoni 		}
356*b47b5b34SRafael Vanoni 	}
357*b47b5b34SRafael Vanoni }
358*b47b5b34SRafael Vanoni 
359*b47b5b34SRafael Vanoni /*
360*b47b5b34SRafael Vanoni  * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
361*b47b5b34SRafael Vanoni  */
362*b47b5b34SRafael Vanoni static int
363*b47b5b34SRafael Vanoni pt_cpufreq_snapshot(void)
364*b47b5b34SRafael Vanoni {
365*b47b5b34SRafael Vanoni 	kstat_ctl_t 		*kc;
366*b47b5b34SRafael Vanoni 	int 			ret;
367*b47b5b34SRafael Vanoni 	uint_t			i;
368*b47b5b34SRafael Vanoni 
369*b47b5b34SRafael Vanoni 	if ((kc = kstat_open()) == NULL)
370*b47b5b34SRafael Vanoni 		return (errno);
371*b47b5b34SRafael Vanoni 
372*b47b5b34SRafael Vanoni 	switch (g_op_mode) {
373*b47b5b34SRafael Vanoni 	case PTOP_MODE_CPU:
374*b47b5b34SRafael Vanoni 		ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
375*b47b5b34SRafael Vanoni 		break;
376*b47b5b34SRafael Vanoni 	case PTOP_MODE_DEFAULT:
377*b47b5b34SRafael Vanoni 	default:
378*b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus_observed; i++)
379*b47b5b34SRafael Vanoni 			if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
380*b47b5b34SRafael Vanoni 				break;
381*b47b5b34SRafael Vanoni 		break;
382*b47b5b34SRafael Vanoni 	}
383*b47b5b34SRafael Vanoni 
384*b47b5b34SRafael Vanoni 	if (kstat_close(kc) != 0)
385*b47b5b34SRafael Vanoni 		pt_error("%s : couldn't close kstat\n", __FILE__);
386*b47b5b34SRafael Vanoni 
387*b47b5b34SRafael Vanoni 	return (ret);
388*b47b5b34SRafael Vanoni }
389*b47b5b34SRafael Vanoni 
390*b47b5b34SRafael Vanoni static int
391*b47b5b34SRafael Vanoni pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
392*b47b5b34SRafael Vanoni {
393*b47b5b34SRafael Vanoni 	kstat_t 		*ksp;
394*b47b5b34SRafael Vanoni 	kstat_named_t 		*knp;
395*b47b5b34SRafael Vanoni 
396*b47b5b34SRafael Vanoni 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
397*b47b5b34SRafael Vanoni 	if (ksp == NULL) {
398*b47b5b34SRafael Vanoni 		pt_error("%s : couldn't find cpu_info kstat for CPU "
399*b47b5b34SRafael Vanoni 		"%d\n", __FILE__, cpu);
400*b47b5b34SRafael Vanoni 		return (1);
401*b47b5b34SRafael Vanoni 	}
402*b47b5b34SRafael Vanoni 
403*b47b5b34SRafael Vanoni 	if (kstat_read(kc, ksp, NULL) == -1) {
404*b47b5b34SRafael Vanoni 		pt_error("%s : couldn't read cpu_info kstat for "
405*b47b5b34SRafael Vanoni 		    "CPU %d\n", __FILE__, cpu);
406*b47b5b34SRafael Vanoni 		return (2);
407*b47b5b34SRafael Vanoni 	}
408*b47b5b34SRafael Vanoni 
409*b47b5b34SRafael Vanoni 	knp = kstat_data_lookup(ksp, "current_clock_Hz");
410*b47b5b34SRafael Vanoni 	if (knp == NULL) {
411*b47b5b34SRafael Vanoni 		pt_error("%s : couldn't find current_clock_Hz "
412*b47b5b34SRafael Vanoni 		    "kstat for CPU %d\n", __FILE__, cpu);
413*b47b5b34SRafael Vanoni 		return (3);
414*b47b5b34SRafael Vanoni 	}
415*b47b5b34SRafael Vanoni 
416*b47b5b34SRafael Vanoni 	g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
417*b47b5b34SRafael Vanoni 
418*b47b5b34SRafael Vanoni 	return (0);
419*b47b5b34SRafael Vanoni }
420*b47b5b34SRafael Vanoni 
421*b47b5b34SRafael Vanoni /*
422*b47b5b34SRafael Vanoni  * DTrace aggregation walker that sorts through a snapshot of the
423*b47b5b34SRafael Vanoni  * aggregation data collected during firings of the cpu-change-speed
424*b47b5b34SRafael Vanoni  * probe.
425*b47b5b34SRafael Vanoni  */
426*b47b5b34SRafael Vanoni /*ARGSUSED*/
427*b47b5b34SRafael Vanoni static int
428*b47b5b34SRafael Vanoni pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
429*b47b5b34SRafael Vanoni {
430*b47b5b34SRafael Vanoni 	dtrace_aggdesc_t 	*aggdesc = data->dtada_desc;
431*b47b5b34SRafael Vanoni 	dtrace_recdesc_t 	*cpu_rec, *speed_rec;
432*b47b5b34SRafael Vanoni 	cpu_power_info_t 	*cpu_pow;
433*b47b5b34SRafael Vanoni 	int32_t 		cpu;
434*b47b5b34SRafael Vanoni 	uint64_t 		speed;
435*b47b5b34SRafael Vanoni 	hrtime_t 		dt_state_time = 0;
436*b47b5b34SRafael Vanoni 	int 			i;
437*b47b5b34SRafael Vanoni 
438*b47b5b34SRafael Vanoni 	if (strcmp(aggdesc->dtagd_name, "times") == 0) {
439*b47b5b34SRafael Vanoni 		cpu_rec = &aggdesc->dtagd_rec[1];
440*b47b5b34SRafael Vanoni 		speed_rec = &aggdesc->dtagd_rec[2];
441*b47b5b34SRafael Vanoni 
442*b47b5b34SRafael Vanoni 		for (i = 0; i < g_ncpus; i++) {
443*b47b5b34SRafael Vanoni 			/* LINTED - alignment */
444*b47b5b34SRafael Vanoni 			dt_state_time += *((hrtime_t *)(data->dtada_percpu[i]));
445*b47b5b34SRafael Vanoni 		}
446*b47b5b34SRafael Vanoni 
447*b47b5b34SRafael Vanoni 		/* LINTED - alignment */
448*b47b5b34SRafael Vanoni 		cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
449*b47b5b34SRafael Vanoni 		/* LINTED - alignment */
450*b47b5b34SRafael Vanoni 		speed = *(uint64_t *)(data->dtada_data +
451*b47b5b34SRafael Vanoni 		    speed_rec->dtrd_offset);
452*b47b5b34SRafael Vanoni 
453*b47b5b34SRafael Vanoni 		if (speed == 0) {
454*b47b5b34SRafael Vanoni 			speed = max_cpufreq;
455*b47b5b34SRafael Vanoni 		}
456*b47b5b34SRafael Vanoni 
457*b47b5b34SRafael Vanoni 		/*
458*b47b5b34SRafael Vanoni 		 * We have an aggregation record for "cpu" being at "speed"
459*b47b5b34SRafael Vanoni 		 * for an interval of "n" nanoseconds. The reported interval
460*b47b5b34SRafael Vanoni 		 * may exceed the powertop sampling interval, since we only
461*b47b5b34SRafael Vanoni 		 * notice during potentially infrequent firings of the
462*b47b5b34SRafael Vanoni 		 * "speed change" DTrace probe. In this case powertop would
463*b47b5b34SRafael Vanoni 		 * have already accounted for the portions of the interval
464*b47b5b34SRafael Vanoni 		 * that happened during prior powertop samplings, so subtract
465*b47b5b34SRafael Vanoni 		 * out time already accounted.
466*b47b5b34SRafael Vanoni 		 */
467*b47b5b34SRafael Vanoni 		cpu_pow = &g_cpu_power_states[cpu];
468*b47b5b34SRafael Vanoni 
469*b47b5b34SRafael Vanoni 		for (i = 0; i < g_npstates; i++) {
470*b47b5b34SRafael Vanoni 			if (g_pstate_info[i].speed == speed) {
471*b47b5b34SRafael Vanoni 				if (cpu_pow->time_accounted > 0) {
472*b47b5b34SRafael Vanoni 					if (dt_state_time == 0)
473*b47b5b34SRafael Vanoni 						continue;
474*b47b5b34SRafael Vanoni 					if (dt_state_time >
475*b47b5b34SRafael Vanoni 					    cpu_pow->time_accounted) {
476*b47b5b34SRafael Vanoni 						dt_state_time -=
477*b47b5b34SRafael Vanoni 						    cpu_pow->time_accounted;
478*b47b5b34SRafael Vanoni 						cpu_pow->time_accounted = 0;
479*b47b5b34SRafael Vanoni 					}
480*b47b5b34SRafael Vanoni 				}
481*b47b5b34SRafael Vanoni 				g_pstate_info[i].total_time += dt_state_time;
482*b47b5b34SRafael Vanoni 				cpu_pow->dtrace_time += dt_state_time;
483*b47b5b34SRafael Vanoni 			}
484*b47b5b34SRafael Vanoni 		}
485*b47b5b34SRafael Vanoni 	}
486*b47b5b34SRafael Vanoni 	return (DTRACE_AGGWALK_NEXT);
487*b47b5b34SRafael Vanoni }
488*b47b5b34SRafael Vanoni 
489*b47b5b34SRafael Vanoni /*
490*b47b5b34SRafael Vanoni  * Used as a suggestion, sets PM in /etc/power.conf and
491*b47b5b34SRafael Vanoni  * a 1sec threshold, then calls /usr/sbin/pmconfig
492*b47b5b34SRafael Vanoni  */
493*b47b5b34SRafael Vanoni void
494*b47b5b34SRafael Vanoni enable_p_state(void)
495*b47b5b34SRafael Vanoni {
496*b47b5b34SRafael Vanoni 	(void) system(cpupm_enable);
497*b47b5b34SRafael Vanoni 	(void) system(cpupm_treshold);
498*b47b5b34SRafael Vanoni 	(void) system(default_pmconf);
499*b47b5b34SRafael Vanoni }
500*b47b5b34SRafael Vanoni 
501*b47b5b34SRafael Vanoni /*
502*b47b5b34SRafael Vanoni  * Checks if PM is enabled in /etc/power.conf, enabling if not
503*b47b5b34SRafael Vanoni  */
504*b47b5b34SRafael Vanoni void
505*b47b5b34SRafael Vanoni suggest_p_state(void)
506*b47b5b34SRafael Vanoni {
507*b47b5b34SRafael Vanoni 	char 	line[1024];
508*b47b5b34SRafael Vanoni 	FILE 	*file;
509*b47b5b34SRafael Vanoni 
510*b47b5b34SRafael Vanoni 	/*
511*b47b5b34SRafael Vanoni 	 * Return if speed transition is not supported
512*b47b5b34SRafael Vanoni 	 */
513*b47b5b34SRafael Vanoni 	if (g_npstates < 2)
514*b47b5b34SRafael Vanoni 		return;
515*b47b5b34SRafael Vanoni 
516*b47b5b34SRafael Vanoni 	file = fopen(default_conf, "r");
517*b47b5b34SRafael Vanoni 
518*b47b5b34SRafael Vanoni 	if (!file)
519*b47b5b34SRafael Vanoni 		return;
520*b47b5b34SRafael Vanoni 
521*b47b5b34SRafael Vanoni 	(void) memset(line, 0, 1024);
522*b47b5b34SRafael Vanoni 
523*b47b5b34SRafael Vanoni 	while (fgets(line, 1023, file)) {
524*b47b5b34SRafael Vanoni 		if (strstr(line, "cpupm")) {
525*b47b5b34SRafael Vanoni 			if (strstr(line, "enable")) {
526*b47b5b34SRafael Vanoni 				(void) fclose(file);
527*b47b5b34SRafael Vanoni 				return;
528*b47b5b34SRafael Vanoni 			}
529*b47b5b34SRafael Vanoni 		}
530*b47b5b34SRafael Vanoni 	}
531*b47b5b34SRafael Vanoni 
532*b47b5b34SRafael Vanoni 	add_suggestion("Suggestion: enable CPU power management by "
533*b47b5b34SRafael Vanoni 	    "pressing the P key",  40, 'P', "P - Enable p-state",
534*b47b5b34SRafael Vanoni 	    enable_p_state);
535*b47b5b34SRafael Vanoni 
536*b47b5b34SRafael Vanoni 	(void) fclose(file);
537*b47b5b34SRafael Vanoni }
538