xref: /freebsd/sys/i386/i386/perfmon.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
1 /*-
2  * Copyright 1996 Massachusetts Institute of Technology
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation for any purpose and without fee is hereby
6  * granted, provided that both the above copyright notice and this
7  * permission notice appear in all copies, that both the above
8  * copyright notice and this permission notice appear in all
9  * supporting documentation, and that the name of M.I.T. not be used
10  * in advertising or publicity pertaining to distribution of the
11  * software without specific, written prior permission.  M.I.T. makes
12  * no representations about the suitability of this software for any
13  * purpose.  It is provided "as is" without express or implied
14  * warranty.
15  *
16  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/fcntl.h>
34 #include <sys/kernel.h>
35 
36 #ifndef SMP
37 #include <machine/cputypes.h>
38 #endif
39 #include <machine/clock.h>
40 #include <machine/perfmon.h>
41 #include <machine/specialreg.h>
42 
43 static int perfmon_inuse;
44 static int perfmon_cpuok;
45 #ifndef SMP
46 static int msr_ctl[NPMC];
47 #endif
48 static int msr_pmc[NPMC];
49 static unsigned int ctl_shadow[NPMC];
50 static quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
51 static int (*writectl)(int);
52 #ifndef SMP
53 static int writectl5(int);
54 static int writectl6(int);
55 #endif
56 
57 static d_close_t perfmon_close;
58 static d_open_t	perfmon_open;
59 static d_ioctl_t perfmon_ioctl;
60 
61 /*
62  * XXX perfmon_init_dev(void *) is a split from the perfmon_init() function.
63  * This solves a problem for DEVFS users.  It loads the "perfmon" driver after
64  * the DEVFS subsystem has been kicked into action.  The SI_ORDER_ANY is to
65  * assure that it is the most lowest priority task which, guarantees the
66  * above.
67  */
68 static void perfmon_init_dev(void *);
69 SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL);
70 
71 static struct cdevsw perfmon_cdevsw = {
72 	.d_version =	D_VERSION,
73 	.d_flags =	D_NEEDGIANT,
74 	.d_open =	perfmon_open,
75 	.d_close =	perfmon_close,
76 	.d_ioctl =	perfmon_ioctl,
77 	.d_name =	"perfmon",
78 };
79 
80 /*
81  * Must be called after cpu_class is set up.
82  */
83 void
perfmon_init(void)84 perfmon_init(void)
85 {
86 #ifndef SMP
87 	switch(cpu_class) {
88 	case CPUCLASS_586:
89 		perfmon_cpuok = 1;
90 		msr_ctl[0] = MSR_P5_CESR;
91 		msr_ctl[1] = MSR_P5_CESR;
92 		msr_pmc[0] = MSR_P5_CTR0;
93 		msr_pmc[1] = MSR_P5_CTR1;
94 		writectl = writectl5;
95 		break;
96 	case CPUCLASS_686:
97 		perfmon_cpuok = 1;
98 		msr_ctl[0] = MSR_EVNTSEL0;
99 		msr_ctl[1] = MSR_EVNTSEL1;
100 		msr_pmc[0] = MSR_PERFCTR0;
101 		msr_pmc[1] = MSR_PERFCTR1;
102 		writectl = writectl6;
103 		break;
104 
105 	default:
106 		perfmon_cpuok = 0;
107 		break;
108 	}
109 #endif /* SMP */
110 }
111 
112 static void
perfmon_init_dev(void * dummy)113 perfmon_init_dev(void *dummy)
114 {
115 	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
116 }
117 
118 int
perfmon_avail(void)119 perfmon_avail(void)
120 {
121 	return perfmon_cpuok;
122 }
123 
124 int
perfmon_setup(int pmc,unsigned int control)125 perfmon_setup(int pmc, unsigned int control)
126 {
127 	register_t	saveintr;
128 
129 	if (pmc < 0 || pmc >= NPMC)
130 		return EINVAL;
131 
132 	perfmon_inuse |= (1 << pmc);
133 	control &= ~(PMCF_SYS_FLAGS << 16);
134 	saveintr = intr_disable();
135 	ctl_shadow[pmc] = control;
136 	writectl(pmc);
137 	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
138 	intr_restore(saveintr);
139 	return 0;
140 }
141 
142 int
perfmon_get(int pmc,unsigned int * control)143 perfmon_get(int pmc, unsigned int *control)
144 {
145 	if (pmc < 0 || pmc >= NPMC)
146 		return EINVAL;
147 
148 	if (perfmon_inuse & (1 << pmc)) {
149 		*control = ctl_shadow[pmc];
150 		return 0;
151 	}
152 	return EBUSY;		/* XXX reversed sense */
153 }
154 
155 int
perfmon_fini(int pmc)156 perfmon_fini(int pmc)
157 {
158 	if (pmc < 0 || pmc >= NPMC)
159 		return EINVAL;
160 
161 	if (perfmon_inuse & (1 << pmc)) {
162 		perfmon_stop(pmc);
163 		ctl_shadow[pmc] = 0;
164 		perfmon_inuse &= ~(1 << pmc);
165 		return 0;
166 	}
167 	return EBUSY;		/* XXX reversed sense */
168 }
169 
170 int
perfmon_start(int pmc)171 perfmon_start(int pmc)
172 {
173 	register_t	saveintr;
174 
175 	if (pmc < 0 || pmc >= NPMC)
176 		return EINVAL;
177 
178 	if (perfmon_inuse & (1 << pmc)) {
179 		saveintr = intr_disable();
180 		ctl_shadow[pmc] |= (PMCF_EN << 16);
181 		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
182 		writectl(pmc);
183 		intr_restore(saveintr);
184 		return 0;
185 	}
186 	return EBUSY;
187 }
188 
189 int
perfmon_stop(int pmc)190 perfmon_stop(int pmc)
191 {
192 	register_t	saveintr;
193 
194 	if (pmc < 0 || pmc >= NPMC)
195 		return EINVAL;
196 
197 	if (perfmon_inuse & (1 << pmc)) {
198 		saveintr = intr_disable();
199 		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
200 		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
201 		writectl(pmc);
202 		intr_restore(saveintr);
203 		return 0;
204 	}
205 	return EBUSY;
206 }
207 
208 int
perfmon_read(int pmc,quad_t * val)209 perfmon_read(int pmc, quad_t *val)
210 {
211 	if (pmc < 0 || pmc >= NPMC)
212 		return EINVAL;
213 
214 	if (perfmon_inuse & (1 << pmc)) {
215 		if (ctl_shadow[pmc] & (PMCF_EN << 16))
216 			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
217 		else
218 			*val = pmc_shadow[pmc];
219 		return 0;
220 	}
221 
222 	return EBUSY;
223 }
224 
225 int
perfmon_reset(int pmc)226 perfmon_reset(int pmc)
227 {
228 	if (pmc < 0 || pmc >= NPMC)
229 		return EINVAL;
230 
231 	if (perfmon_inuse & (1 << pmc)) {
232 		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
233 		return 0;
234 	}
235 	return EBUSY;
236 }
237 
238 #ifndef SMP
239 /*
240  * Unfortunately, the performance-monitoring registers are laid out
241  * differently in the P5 and P6.  We keep everything in P6 format
242  * internally (except for the event code), and convert to P5
243  * format as needed on those CPUs.  The writectl function pointer
244  * is set up to point to one of these functions by perfmon_init().
245  */
246 int
writectl6(int pmc)247 writectl6(int pmc)
248 {
249 	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
250 		wrmsr(msr_ctl[pmc], 0);
251 	} else {
252 		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
253 	}
254 	return 0;
255 }
256 
257 #define	P5FLAG_P	0x200
258 #define	P5FLAG_E	0x100
259 #define	P5FLAG_USR	0x80
260 #define	P5FLAG_OS	0x40
261 
262 int
writectl5(int pmc)263 writectl5(int pmc)
264 {
265 	quad_t newval = 0;
266 
267 	if (ctl_shadow[1] & (PMCF_EN << 16)) {
268 		if (ctl_shadow[1] & (PMCF_USR << 16))
269 			newval |= P5FLAG_USR << 16;
270 		if (ctl_shadow[1] & (PMCF_OS << 16))
271 			newval |= P5FLAG_OS << 16;
272 		if (!(ctl_shadow[1] & (PMCF_E << 16)))
273 			newval |= P5FLAG_E << 16;
274 		newval |= (ctl_shadow[1] & 0x3f) << 16;
275 	}
276 	if (ctl_shadow[0] & (PMCF_EN << 16)) {
277 		if (ctl_shadow[0] & (PMCF_USR << 16))
278 			newval |= P5FLAG_USR;
279 		if (ctl_shadow[0] & (PMCF_OS << 16))
280 			newval |= P5FLAG_OS;
281 		if (!(ctl_shadow[0] & (PMCF_E << 16)))
282 			newval |= P5FLAG_E;
283 		newval |= ctl_shadow[0] & 0x3f;
284 	}
285 
286 	wrmsr(msr_ctl[0], newval);
287 	return 0;		/* XXX should check for unimplemented bits */
288 }
289 #endif /* !SMP */
290 
291 /*
292  * Now the user-mode interface, called from a subdevice of mem.c.
293  */
294 static int writer;
295 static int writerpmc;
296 
297 static int
perfmon_open(struct cdev * dev,int flags,int fmt,struct thread * td)298 perfmon_open(struct cdev *dev, int flags, int fmt, struct thread *td)
299 {
300 	if (!perfmon_cpuok)
301 		return ENXIO;
302 
303 	if (flags & FWRITE) {
304 		if (writer) {
305 			return EBUSY;
306 		} else {
307 			writer = 1;
308 			writerpmc = 0;
309 		}
310 	}
311 	return 0;
312 }
313 
314 static int
perfmon_close(struct cdev * dev,int flags,int fmt,struct thread * td)315 perfmon_close(struct cdev *dev, int flags, int fmt, struct thread *td)
316 {
317 	if (flags & FWRITE) {
318 		int i;
319 
320 		for (i = 0; i < NPMC; i++) {
321 			if (writerpmc & (1 << i))
322 				perfmon_fini(i);
323 		}
324 		writer = 0;
325 	}
326 	return 0;
327 }
328 
329 static int
perfmon_ioctl(struct cdev * dev,u_long cmd,caddr_t param,int flags,struct thread * td)330 perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_t param, int flags, struct thread *td)
331 {
332 	struct pmc *pmc;
333 	struct pmc_data *pmcd;
334 	struct pmc_tstamp *pmct;
335 	uint64_t freq;
336 	int *ip;
337 	int rv;
338 
339 	switch(cmd) {
340 	case PMIOSETUP:
341 		if (!(flags & FWRITE))
342 			return EPERM;
343 		pmc = (struct pmc *)param;
344 
345 		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
346 		if (!rv) {
347 			writerpmc |= (1 << pmc->pmc_num);
348 		}
349 		break;
350 
351 	case PMIOGET:
352 		pmc = (struct pmc *)param;
353 		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
354 		break;
355 
356 	case PMIOSTART:
357 		if (!(flags & FWRITE))
358 			return EPERM;
359 
360 		ip = (int *)param;
361 		rv = perfmon_start(*ip);
362 		break;
363 
364 	case PMIOSTOP:
365 		if (!(flags & FWRITE))
366 			return EPERM;
367 
368 		ip = (int *)param;
369 		rv = perfmon_stop(*ip);
370 		break;
371 
372 	case PMIORESET:
373 		if (!(flags & FWRITE))
374 			return EPERM;
375 
376 		ip = (int *)param;
377 		rv = perfmon_reset(*ip);
378 		break;
379 
380 	case PMIOREAD:
381 		pmcd = (struct pmc_data *)param;
382 		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
383 		break;
384 
385 	case PMIOTSTAMP:
386 		freq = atomic_load_acq_64(&tsc_freq);
387 		if (freq == 0) {
388 			rv = ENOTTY;
389 			break;
390 		}
391 		pmct = (struct pmc_tstamp *)param;
392 		/* XXX interface loses precision. */
393 		pmct->pmct_rate = freq / 1000000;
394 		pmct->pmct_value = rdtsc();
395 		rv = 0;
396 		break;
397 	default:
398 		rv = ENOTTY;
399 	}
400 
401 	return rv;
402 }
403