1 /*-
2 * Copyright 1996 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission. M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose. It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/conf.h>
33 #include <sys/fcntl.h>
34 #include <sys/kernel.h>
35
36 #ifndef SMP
37 #include <machine/cputypes.h>
38 #endif
39 #include <machine/clock.h>
40 #include <machine/perfmon.h>
41 #include <machine/specialreg.h>
42
43 static int perfmon_inuse;
44 static int perfmon_cpuok;
45 #ifndef SMP
46 static int msr_ctl[NPMC];
47 #endif
48 static int msr_pmc[NPMC];
49 static unsigned int ctl_shadow[NPMC];
50 static quad_t pmc_shadow[NPMC]; /* used when ctr is stopped on P5 */
51 static int (*writectl)(int);
52 #ifndef SMP
53 static int writectl5(int);
54 static int writectl6(int);
55 #endif
56
57 static d_close_t perfmon_close;
58 static d_open_t perfmon_open;
59 static d_ioctl_t perfmon_ioctl;
60
61 /*
62 * XXX perfmon_init_dev(void *) is a split from the perfmon_init() function.
63 * This solves a problem for DEVFS users. It loads the "perfmon" driver after
64 * the DEVFS subsystem has been kicked into action. The SI_ORDER_ANY is to
65 * assure that it is the most lowest priority task which, guarantees the
66 * above.
67 */
68 static void perfmon_init_dev(void *);
69 SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL);
70
71 static struct cdevsw perfmon_cdevsw = {
72 .d_version = D_VERSION,
73 .d_flags = D_NEEDGIANT,
74 .d_open = perfmon_open,
75 .d_close = perfmon_close,
76 .d_ioctl = perfmon_ioctl,
77 .d_name = "perfmon",
78 };
79
80 /*
81 * Must be called after cpu_class is set up.
82 */
83 void
perfmon_init(void)84 perfmon_init(void)
85 {
86 #ifndef SMP
87 switch(cpu_class) {
88 case CPUCLASS_586:
89 perfmon_cpuok = 1;
90 msr_ctl[0] = MSR_P5_CESR;
91 msr_ctl[1] = MSR_P5_CESR;
92 msr_pmc[0] = MSR_P5_CTR0;
93 msr_pmc[1] = MSR_P5_CTR1;
94 writectl = writectl5;
95 break;
96 case CPUCLASS_686:
97 perfmon_cpuok = 1;
98 msr_ctl[0] = MSR_EVNTSEL0;
99 msr_ctl[1] = MSR_EVNTSEL1;
100 msr_pmc[0] = MSR_PERFCTR0;
101 msr_pmc[1] = MSR_PERFCTR1;
102 writectl = writectl6;
103 break;
104
105 default:
106 perfmon_cpuok = 0;
107 break;
108 }
109 #endif /* SMP */
110 }
111
112 static void
perfmon_init_dev(void * dummy)113 perfmon_init_dev(void *dummy)
114 {
115 make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
116 }
117
118 int
perfmon_avail(void)119 perfmon_avail(void)
120 {
121 return perfmon_cpuok;
122 }
123
124 int
perfmon_setup(int pmc,unsigned int control)125 perfmon_setup(int pmc, unsigned int control)
126 {
127 register_t saveintr;
128
129 if (pmc < 0 || pmc >= NPMC)
130 return EINVAL;
131
132 perfmon_inuse |= (1 << pmc);
133 control &= ~(PMCF_SYS_FLAGS << 16);
134 saveintr = intr_disable();
135 ctl_shadow[pmc] = control;
136 writectl(pmc);
137 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
138 intr_restore(saveintr);
139 return 0;
140 }
141
142 int
perfmon_get(int pmc,unsigned int * control)143 perfmon_get(int pmc, unsigned int *control)
144 {
145 if (pmc < 0 || pmc >= NPMC)
146 return EINVAL;
147
148 if (perfmon_inuse & (1 << pmc)) {
149 *control = ctl_shadow[pmc];
150 return 0;
151 }
152 return EBUSY; /* XXX reversed sense */
153 }
154
155 int
perfmon_fini(int pmc)156 perfmon_fini(int pmc)
157 {
158 if (pmc < 0 || pmc >= NPMC)
159 return EINVAL;
160
161 if (perfmon_inuse & (1 << pmc)) {
162 perfmon_stop(pmc);
163 ctl_shadow[pmc] = 0;
164 perfmon_inuse &= ~(1 << pmc);
165 return 0;
166 }
167 return EBUSY; /* XXX reversed sense */
168 }
169
170 int
perfmon_start(int pmc)171 perfmon_start(int pmc)
172 {
173 register_t saveintr;
174
175 if (pmc < 0 || pmc >= NPMC)
176 return EINVAL;
177
178 if (perfmon_inuse & (1 << pmc)) {
179 saveintr = intr_disable();
180 ctl_shadow[pmc] |= (PMCF_EN << 16);
181 wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
182 writectl(pmc);
183 intr_restore(saveintr);
184 return 0;
185 }
186 return EBUSY;
187 }
188
189 int
perfmon_stop(int pmc)190 perfmon_stop(int pmc)
191 {
192 register_t saveintr;
193
194 if (pmc < 0 || pmc >= NPMC)
195 return EINVAL;
196
197 if (perfmon_inuse & (1 << pmc)) {
198 saveintr = intr_disable();
199 pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
200 ctl_shadow[pmc] &= ~(PMCF_EN << 16);
201 writectl(pmc);
202 intr_restore(saveintr);
203 return 0;
204 }
205 return EBUSY;
206 }
207
208 int
perfmon_read(int pmc,quad_t * val)209 perfmon_read(int pmc, quad_t *val)
210 {
211 if (pmc < 0 || pmc >= NPMC)
212 return EINVAL;
213
214 if (perfmon_inuse & (1 << pmc)) {
215 if (ctl_shadow[pmc] & (PMCF_EN << 16))
216 *val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
217 else
218 *val = pmc_shadow[pmc];
219 return 0;
220 }
221
222 return EBUSY;
223 }
224
225 int
perfmon_reset(int pmc)226 perfmon_reset(int pmc)
227 {
228 if (pmc < 0 || pmc >= NPMC)
229 return EINVAL;
230
231 if (perfmon_inuse & (1 << pmc)) {
232 wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
233 return 0;
234 }
235 return EBUSY;
236 }
237
238 #ifndef SMP
239 /*
240 * Unfortunately, the performance-monitoring registers are laid out
241 * differently in the P5 and P6. We keep everything in P6 format
242 * internally (except for the event code), and convert to P5
243 * format as needed on those CPUs. The writectl function pointer
244 * is set up to point to one of these functions by perfmon_init().
245 */
246 int
writectl6(int pmc)247 writectl6(int pmc)
248 {
249 if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
250 wrmsr(msr_ctl[pmc], 0);
251 } else {
252 wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
253 }
254 return 0;
255 }
256
257 #define P5FLAG_P 0x200
258 #define P5FLAG_E 0x100
259 #define P5FLAG_USR 0x80
260 #define P5FLAG_OS 0x40
261
262 int
writectl5(int pmc)263 writectl5(int pmc)
264 {
265 quad_t newval = 0;
266
267 if (ctl_shadow[1] & (PMCF_EN << 16)) {
268 if (ctl_shadow[1] & (PMCF_USR << 16))
269 newval |= P5FLAG_USR << 16;
270 if (ctl_shadow[1] & (PMCF_OS << 16))
271 newval |= P5FLAG_OS << 16;
272 if (!(ctl_shadow[1] & (PMCF_E << 16)))
273 newval |= P5FLAG_E << 16;
274 newval |= (ctl_shadow[1] & 0x3f) << 16;
275 }
276 if (ctl_shadow[0] & (PMCF_EN << 16)) {
277 if (ctl_shadow[0] & (PMCF_USR << 16))
278 newval |= P5FLAG_USR;
279 if (ctl_shadow[0] & (PMCF_OS << 16))
280 newval |= P5FLAG_OS;
281 if (!(ctl_shadow[0] & (PMCF_E << 16)))
282 newval |= P5FLAG_E;
283 newval |= ctl_shadow[0] & 0x3f;
284 }
285
286 wrmsr(msr_ctl[0], newval);
287 return 0; /* XXX should check for unimplemented bits */
288 }
289 #endif /* !SMP */
290
291 /*
292 * Now the user-mode interface, called from a subdevice of mem.c.
293 */
294 static int writer;
295 static int writerpmc;
296
297 static int
perfmon_open(struct cdev * dev,int flags,int fmt,struct thread * td)298 perfmon_open(struct cdev *dev, int flags, int fmt, struct thread *td)
299 {
300 if (!perfmon_cpuok)
301 return ENXIO;
302
303 if (flags & FWRITE) {
304 if (writer) {
305 return EBUSY;
306 } else {
307 writer = 1;
308 writerpmc = 0;
309 }
310 }
311 return 0;
312 }
313
314 static int
perfmon_close(struct cdev * dev,int flags,int fmt,struct thread * td)315 perfmon_close(struct cdev *dev, int flags, int fmt, struct thread *td)
316 {
317 if (flags & FWRITE) {
318 int i;
319
320 for (i = 0; i < NPMC; i++) {
321 if (writerpmc & (1 << i))
322 perfmon_fini(i);
323 }
324 writer = 0;
325 }
326 return 0;
327 }
328
329 static int
perfmon_ioctl(struct cdev * dev,u_long cmd,caddr_t param,int flags,struct thread * td)330 perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_t param, int flags, struct thread *td)
331 {
332 struct pmc *pmc;
333 struct pmc_data *pmcd;
334 struct pmc_tstamp *pmct;
335 uint64_t freq;
336 int *ip;
337 int rv;
338
339 switch(cmd) {
340 case PMIOSETUP:
341 if (!(flags & FWRITE))
342 return EPERM;
343 pmc = (struct pmc *)param;
344
345 rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
346 if (!rv) {
347 writerpmc |= (1 << pmc->pmc_num);
348 }
349 break;
350
351 case PMIOGET:
352 pmc = (struct pmc *)param;
353 rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
354 break;
355
356 case PMIOSTART:
357 if (!(flags & FWRITE))
358 return EPERM;
359
360 ip = (int *)param;
361 rv = perfmon_start(*ip);
362 break;
363
364 case PMIOSTOP:
365 if (!(flags & FWRITE))
366 return EPERM;
367
368 ip = (int *)param;
369 rv = perfmon_stop(*ip);
370 break;
371
372 case PMIORESET:
373 if (!(flags & FWRITE))
374 return EPERM;
375
376 ip = (int *)param;
377 rv = perfmon_reset(*ip);
378 break;
379
380 case PMIOREAD:
381 pmcd = (struct pmc_data *)param;
382 rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
383 break;
384
385 case PMIOTSTAMP:
386 freq = atomic_load_acq_64(&tsc_freq);
387 if (freq == 0) {
388 rv = ENOTTY;
389 break;
390 }
391 pmct = (struct pmc_tstamp *)param;
392 /* XXX interface loses precision. */
393 pmct->pmct_rate = freq / 1000000;
394 pmct->pmct_value = rdtsc();
395 rv = 0;
396 break;
397 default:
398 rv = ENOTTY;
399 }
400
401 return rv;
402 }
403