1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2003-2008 Joseph Koshy
5 * Copyright (c) 2007 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * Portions of this software were developed by A. Joseph Koshy under
9 * sponsorship from the FreeBSD Foundation and Google, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 #include "opt_hwpmc_hooks.h"
35
36 #include <sys/param.h>
37 #include <sys/ctype.h>
38 #include <sys/domainset.h>
39 #include <sys/param.h>
40 #include <sys/malloc.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/pmc.h>
45 #include <sys/pmckern.h>
46 #include <sys/smp.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49
50 #include <vm/vm.h>
51 #include <vm/vm_extern.h>
52 #include <vm/vm_kern.h>
53
54 #ifdef HWPMC_HOOKS
55 FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
56 #define PMC_KERNEL_VERSION PMC_VERSION
57 #else
58 #define PMC_KERNEL_VERSION 0
59 #endif
60
61 MALLOC_DECLARE(M_PMCHOOKS);
62 MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
63
64 /* memory pool */
65 MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
66
67 const int pmc_kernel_version = PMC_KERNEL_VERSION;
68
69 /* Hook variable. */
70 int __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
71
72 /* Interrupt handler */
73 int __read_mostly (*pmc_intr)(struct trapframe *tf) = NULL;
74
75 DPCPU_DEFINE(uint8_t, pmc_sampled);
76
77 /*
78 * A global count of SS mode PMCs. When non-zero, this means that
79 * we have processes that are sampling the system as a whole.
80 */
81 volatile int pmc_ss_count;
82
83 /*
84 * Since PMC(4) may not be loaded in the current kernel, the
85 * convention followed is that a non-NULL value of 'pmc_hook' implies
86 * the presence of this kernel module.
87 *
88 * This requires us to protect 'pmc_hook' with a
89 * shared (sx) lock -- thus making the process of calling into PMC(4)
90 * somewhat more expensive than a simple 'if' check and indirect call.
91 */
92 struct sx pmc_sx;
93 SX_SYSINIT(pmcsx, &pmc_sx, "pmc-sx");
94
95 /*
96 * PMC Soft per cpu trapframe.
97 */
98 struct trapframe pmc_tf[MAXCPU];
99
100 /*
101 * Per domain list of buffer headers
102 */
103 __read_mostly struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
104
105 /*
106 * PMC Soft use a global table to store registered events.
107 */
108
109 SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
110 "HWPMC parameters");
111
112 static int pmc_softevents = 16;
113 SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
114 &pmc_softevents, 0, "maximum number of soft events");
115
116 int pmc_softs_count;
117 struct pmc_soft **pmc_softs;
118
119 struct mtx pmc_softs_mtx;
120 MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
121
122 /*
123 * Helper functions.
124 */
125
126 /*
127 * A note on the CPU numbering scheme used by the hwpmc(4) driver.
128 *
129 * CPUs are denoted using numbers in the range 0..[pmc_cpu_max()-1].
130 * CPUs could be numbered "sparsely" in this range; the predicate
131 * `pmc_cpu_is_present()' is used to test whether a given CPU is
132 * physically present.
133 *
134 * Further, a CPU that is physically present may be administratively
135 * disabled or otherwise unavailable for use by hwpmc(4). The
136 * `pmc_cpu_is_active()' predicate tests for CPU usability. An
137 * "active" CPU participates in thread scheduling and can field
138 * interrupts raised by PMC hardware.
139 *
140 * On systems with hyperthreaded CPUs, multiple logical CPUs may share
141 * PMC hardware resources. For such processors one logical CPU is
142 * denoted as the primary owner of the in-CPU PMC resources. The
143 * pmc_cpu_is_primary() predicate is used to distinguish this primary
144 * CPU from the others.
145 */
146
147 int
pmc_cpu_is_active(int cpu)148 pmc_cpu_is_active(int cpu)
149 {
150 #ifdef SMP
151 return (pmc_cpu_is_present(cpu) &&
152 !CPU_ISSET(cpu, &hlt_cpus_mask));
153 #else
154 return (1);
155 #endif
156 }
157
158 /* Deprecated. */
159 int
pmc_cpu_is_disabled(int cpu)160 pmc_cpu_is_disabled(int cpu)
161 {
162 return (!pmc_cpu_is_active(cpu));
163 }
164
165 int
pmc_cpu_is_present(int cpu)166 pmc_cpu_is_present(int cpu)
167 {
168 #ifdef SMP
169 return (!CPU_ABSENT(cpu));
170 #else
171 return (1);
172 #endif
173 }
174
175 int
pmc_cpu_is_primary(int cpu)176 pmc_cpu_is_primary(int cpu)
177 {
178 #ifdef SMP
179 return (!CPU_ISSET(cpu, &logical_cpus_mask));
180 #else
181 return (1);
182 #endif
183 }
184
185 /*
186 * Return the maximum CPU number supported by the system. The return
187 * value is used for scaling internal data structures and for runtime
188 * checks.
189 */
190 unsigned int
pmc_cpu_max(void)191 pmc_cpu_max(void)
192 {
193 #ifdef SMP
194 return (mp_maxid+1);
195 #else
196 return (1);
197 #endif
198 }
199
200 #ifdef INVARIANTS
201
202 /*
203 * Return the count of CPUs in the `active' state in the system.
204 */
205 int
pmc_cpu_max_active(void)206 pmc_cpu_max_active(void)
207 {
208 #ifdef SMP
209 /*
210 * When support for CPU hot-plugging is added to the kernel,
211 * this function would change to return the current number
212 * of "active" CPUs.
213 */
214 return (mp_ncpus);
215 #else
216 return (1);
217 #endif
218 }
219
220 #endif
221
222 /*
223 * Cleanup event name:
224 * - remove duplicate '_'
225 * - all uppercase
226 */
227 static void
pmc_soft_namecleanup(char * name)228 pmc_soft_namecleanup(char *name)
229 {
230 char *p, *q;
231
232 p = q = name;
233
234 for ( ; *p == '_' ; p++)
235 ;
236 for ( ; *p ; p++) {
237 if (*p == '_' && (*(p + 1) == '_' || *(p + 1) == '\0'))
238 continue;
239 else
240 *q++ = toupper(*p);
241 }
242 *q = '\0';
243 }
244
245 void
pmc_soft_ev_register(struct pmc_soft * ps)246 pmc_soft_ev_register(struct pmc_soft *ps)
247 {
248 static int warned = 0;
249 int n;
250
251 ps->ps_running = 0;
252 ps->ps_ev.pm_ev_code = 0; /* invalid */
253 pmc_soft_namecleanup(ps->ps_ev.pm_ev_name);
254
255 mtx_lock_spin(&pmc_softs_mtx);
256
257 if (pmc_softs_count >= pmc_softevents) {
258 /*
259 * XXX Reusing events can enter a race condition where
260 * new allocated event will be used as an old one.
261 */
262 for (n = 0; n < pmc_softevents; n++)
263 if (pmc_softs[n] == NULL)
264 break;
265 if (n == pmc_softevents) {
266 mtx_unlock_spin(&pmc_softs_mtx);
267 if (!warned) {
268 printf("hwpmc: too many soft events, "
269 "increase kern.hwpmc.softevents tunable\n");
270 warned = 1;
271 }
272 return;
273 }
274
275 ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + n;
276 pmc_softs[n] = ps;
277 } else {
278 ps->ps_ev.pm_ev_code = PMC_EV_SOFT_FIRST + pmc_softs_count;
279 pmc_softs[pmc_softs_count++] = ps;
280 }
281
282 mtx_unlock_spin(&pmc_softs_mtx);
283 }
284
285 void
pmc_soft_ev_deregister(struct pmc_soft * ps)286 pmc_soft_ev_deregister(struct pmc_soft *ps)
287 {
288
289 KASSERT(ps != NULL, ("pmc_soft_deregister: called with NULL"));
290
291 mtx_lock_spin(&pmc_softs_mtx);
292
293 if (ps->ps_ev.pm_ev_code != 0 &&
294 (ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST) < pmc_softevents) {
295 KASSERT((int)ps->ps_ev.pm_ev_code >= PMC_EV_SOFT_FIRST &&
296 (int)ps->ps_ev.pm_ev_code <= PMC_EV_SOFT_LAST,
297 ("pmc_soft_deregister: invalid event value"));
298 pmc_softs[ps->ps_ev.pm_ev_code - PMC_EV_SOFT_FIRST] = NULL;
299 }
300
301 mtx_unlock_spin(&pmc_softs_mtx);
302 }
303
304 struct pmc_soft *
pmc_soft_ev_acquire(enum pmc_event ev)305 pmc_soft_ev_acquire(enum pmc_event ev)
306 {
307 struct pmc_soft *ps;
308
309 if (ev == 0 || (ev - PMC_EV_SOFT_FIRST) >= pmc_softevents)
310 return NULL;
311
312 KASSERT((int)ev >= PMC_EV_SOFT_FIRST &&
313 (int)ev <= PMC_EV_SOFT_LAST,
314 ("event out of range"));
315
316 mtx_lock_spin(&pmc_softs_mtx);
317
318 ps = pmc_softs[ev - PMC_EV_SOFT_FIRST];
319 if (ps == NULL)
320 mtx_unlock_spin(&pmc_softs_mtx);
321
322 return ps;
323 }
324
325 void
pmc_soft_ev_release(struct pmc_soft * ps)326 pmc_soft_ev_release(struct pmc_soft *ps)
327 {
328
329 mtx_unlock_spin(&pmc_softs_mtx);
330 }
331
332 /*
333 * Initialise hwpmc.
334 */
335 static void
init_hwpmc(void * dummy __unused)336 init_hwpmc(void *dummy __unused)
337 {
338 int domain, cpu;
339
340 if (pmc_softevents <= 0 ||
341 pmc_softevents > PMC_EV_DYN_COUNT) {
342 (void) printf("hwpmc: tunable \"softevents\"=%d out of "
343 "range.\n", pmc_softevents);
344 pmc_softevents = PMC_EV_DYN_COUNT;
345 }
346 pmc_softs = malloc(pmc_softevents * sizeof(*pmc_softs), M_PMCHOOKS,
347 M_WAITOK | M_ZERO);
348
349 for (domain = 0; domain < vm_ndomains; domain++) {
350 pmc_dom_hdrs[domain] = malloc_domainset(
351 sizeof(struct pmc_domain_buffer_header), M_PMC,
352 DOMAINSET_PREF(domain), M_WAITOK | M_ZERO);
353 mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
354 TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
355 }
356 CPU_FOREACH(cpu) {
357 domain = pcpu_find(cpu)->pc_domain;
358 KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain));
359 pmc_dom_hdrs[domain]->pdbh_ncpus++;
360 }
361
362 }
363
364 SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);
365