1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2003-2008 Joseph Koshy 5 * Copyright (c) 2007 The FreeBSD Foundation 6 * Copyright (c) 2018 Matthew Macy 7 * All rights reserved. 8 * 9 * Portions of this software were developed by A. Joseph Koshy under 10 * sponsorship from the FreeBSD Foundation and Google, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/eventhandler.h> 40 #include <sys/jail.h> 41 #include <sys/kernel.h> 42 #include <sys/kthread.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/malloc.h> 46 #include <sys/module.h> 47 #include <sys/mount.h> 48 #include <sys/mutex.h> 49 #include <sys/pmc.h> 50 #include <sys/pmckern.h> 51 #include <sys/pmclog.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/queue.h> 55 #include <sys/resourcevar.h> 56 #include <sys/rwlock.h> 57 #include <sys/sched.h> 58 #include <sys/signalvar.h> 59 #include <sys/smp.h> 60 #include <sys/sx.h> 61 #include <sys/sysctl.h> 62 #include <sys/sysent.h> 63 #include <sys/systm.h> 64 #include <sys/vnode.h> 65 66 #include <sys/linker.h> /* needs to be after <sys/malloc.h> */ 67 68 #include <machine/atomic.h> 69 #include <machine/md_var.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_extern.h> 73 #include <vm/pmap.h> 74 #include <vm/vm_map.h> 75 #include <vm/vm_object.h> 76 77 #include "hwpmc_soft.h" 78 79 #ifdef NUMA 80 #define NDOMAINS vm_ndomains 81 #else 82 #define NDOMAINS 1 83 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags)) 84 #define free_domain(addr, type) free(addr, type) 85 #endif 86 87 /* 88 * Types 89 */ 90 91 enum pmc_flags { 92 PMC_FLAG_NONE = 0x00, /* do nothing */ 93 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ 94 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ 95 }; 96 97 /* 98 * The offset in sysent where the syscall is allocated. 99 */ 100 101 static int pmc_syscall_num = NO_SYSCALL; 102 struct pmc_cpu **pmc_pcpu; /* per-cpu state */ 103 pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ 104 105 #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] 106 107 struct mtx_pool *pmc_mtxpool; 108 static int *pmc_pmcdisp; /* PMC row dispositions */ 109 110 #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) 111 #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) 112 #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) 113 114 #define PMC_MARK_ROW_FREE(R) do { \ 115 pmc_pmcdisp[(R)] = 0; \ 116 } while (0) 117 118 #define PMC_MARK_ROW_STANDALONE(R) do { \ 119 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 120 __LINE__)); \ 121 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 122 KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ 123 ("[pmc,%d] row disposition error", __LINE__)); \ 124 } while (0) 125 126 #define PMC_UNMARK_ROW_STANDALONE(R) do { \ 127 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 128 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 129 __LINE__)); \ 130 } while (0) 131 132 #define PMC_MARK_ROW_THREAD(R) do { \ 133 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 134 __LINE__)); \ 135 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 136 } while (0) 137 138 #define PMC_UNMARK_ROW_THREAD(R) do { \ 139 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 140 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 141 __LINE__)); \ 142 } while (0) 143 144 145 /* various event handlers */ 146 static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag, 147 pmc_kld_unload_tag; 148 149 /* Module statistics */ 150 struct pmc_driverstats pmc_stats; 151 152 153 /* Machine/processor dependent operations */ 154 static struct pmc_mdep *md; 155 156 /* 157 * Hash tables mapping owner processes and target threads to PMCs. 158 */ 159 160 struct mtx pmc_processhash_mtx; /* spin mutex */ 161 static u_long pmc_processhashmask; 162 static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; 163 164 /* 165 * Hash table of PMC owner descriptors. This table is protected by 166 * the shared PMC "sx" lock. 167 */ 168 169 static u_long pmc_ownerhashmask; 170 static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; 171 172 /* 173 * List of PMC owners with system-wide sampling PMCs. 174 */ 175 176 static LIST_HEAD(, pmc_owner) pmc_ss_owners; 177 178 179 /* 180 * A map of row indices to classdep structures. 181 */ 182 static struct pmc_classdep **pmc_rowindex_to_classdep; 183 184 /* 185 * Prototypes 186 */ 187 188 #ifdef HWPMC_DEBUG 189 static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); 190 static int pmc_debugflags_parse(char *newstr, char *fence); 191 #endif 192 193 static int load(struct module *module, int cmd, void *arg); 194 static int pmc_attach_process(struct proc *p, struct pmc *pm); 195 static struct pmc *pmc_allocate_pmc_descriptor(void); 196 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); 197 static int pmc_attach_one_process(struct proc *p, struct pmc *pm); 198 static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, 199 int cpu); 200 static int pmc_can_attach(struct pmc *pm, struct proc *p); 201 static void pmc_capture_user_callchain(int cpu, int soft, struct trapframe *tf); 202 static void pmc_cleanup(void); 203 static int pmc_detach_process(struct proc *p, struct pmc *pm); 204 static int pmc_detach_one_process(struct proc *p, struct pmc *pm, 205 int flags); 206 static void pmc_destroy_owner_descriptor(struct pmc_owner *po); 207 static void pmc_destroy_pmc_descriptor(struct pmc *pm); 208 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); 209 static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); 210 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, 211 pmc_id_t pmc); 212 static struct pmc_process *pmc_find_process_descriptor(struct proc *p, 213 uint32_t mode); 214 static void pmc_force_context_switch(void); 215 static void pmc_link_target_process(struct pmc *pm, 216 struct pmc_process *pp); 217 static void pmc_log_all_process_mappings(struct pmc_owner *po); 218 static void pmc_log_kernel_mappings(struct pmc *pm); 219 static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); 220 static void pmc_maybe_remove_owner(struct pmc_owner *po); 221 static void pmc_process_csw_in(struct thread *td); 222 static void pmc_process_csw_out(struct thread *td); 223 static void pmc_process_exit(void *arg, struct proc *p); 224 static void pmc_process_fork(void *arg, struct proc *p1, 225 struct proc *p2, int n); 226 static void pmc_process_samples(int cpu, int soft); 227 static void pmc_release_pmc_descriptor(struct pmc *pmc); 228 static void pmc_remove_owner(struct pmc_owner *po); 229 static void pmc_remove_process_descriptor(struct pmc_process *pp); 230 static void pmc_restore_cpu_binding(struct pmc_binding *pb); 231 static void pmc_save_cpu_binding(struct pmc_binding *pb); 232 static void pmc_select_cpu(int cpu); 233 static int pmc_start(struct pmc *pm); 234 static int pmc_stop(struct pmc *pm); 235 static int pmc_syscall_handler(struct thread *td, void *syscall_args); 236 static void pmc_unlink_target_process(struct pmc *pmc, 237 struct pmc_process *pp); 238 static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); 239 static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); 240 static struct pmc_mdep *pmc_generic_cpu_initialize(void); 241 static void pmc_generic_cpu_finalize(struct pmc_mdep *md); 242 243 /* 244 * Kernel tunables and sysctl(8) interface. 245 */ 246 247 SYSCTL_DECL(_kern_hwpmc); 248 SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats"); 249 250 251 /* Stats. */ 252 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW, 253 &pmc_stats.pm_intr_ignored, "# of interrupts ignored"); 254 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW, 255 &pmc_stats.pm_intr_processed, "# of interrupts processed"); 256 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW, 257 &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full"); 258 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW, 259 &pmc_stats.pm_syscalls, "# of syscalls"); 260 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW, 261 &pmc_stats.pm_syscall_errors, "# of syscall_errors"); 262 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW, 263 &pmc_stats.pm_buffer_requests, "# of buffer requests"); 264 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW, 265 &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); 266 SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, 267 &pmc_stats.pm_log_sweeps, "# of ?"); 268 269 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; 270 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, 271 &pmc_callchaindepth, 0, "depth of call chain records"); 272 273 char pmc_cpuid[64]; 274 SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD, 275 pmc_cpuid, 0, "cpu version string"); 276 #ifdef HWPMC_DEBUG 277 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; 278 char pmc_debugstr[PMC_DEBUG_STRSIZE]; 279 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, 280 sizeof(pmc_debugstr)); 281 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, 282 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 283 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); 284 #endif 285 286 287 /* 288 * kern.hwpmc.hashrows -- determines the number of rows in the 289 * of the hash table used to look up threads 290 */ 291 292 static int pmc_hashsize = PMC_HASH_SIZE; 293 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_RDTUN, 294 &pmc_hashsize, 0, "rows in hash tables"); 295 296 /* 297 * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU 298 */ 299 300 static int pmc_nsamples = PMC_NSAMPLES; 301 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, 302 &pmc_nsamples, 0, "number of PC samples per CPU"); 303 304 305 /* 306 * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. 307 */ 308 309 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; 310 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_RDTUN, 311 &pmc_mtxpool_size, 0, "size of spin mutex pool"); 312 313 314 /* 315 * security.bsd.unprivileged_syspmcs -- allow non-root processes to 316 * allocate system-wide PMCs. 317 * 318 * Allowing unprivileged processes to allocate system PMCs is convenient 319 * if system-wide measurements need to be taken concurrently with other 320 * per-process measurements. This feature is turned off by default. 321 */ 322 323 static int pmc_unprivileged_syspmcs = 0; 324 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RWTUN, 325 &pmc_unprivileged_syspmcs, 0, 326 "allow unprivileged process to allocate system PMCs"); 327 328 /* 329 * Hash function. Discard the lower 2 bits of the pointer since 330 * these are always zero for our uses. The hash multiplier is 331 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). 332 */ 333 334 #if LONG_BIT == 64 335 #define _PMC_HM 11400714819323198486u 336 #elif LONG_BIT == 32 337 #define _PMC_HM 2654435769u 338 #else 339 #error Must know the size of 'long' to compile 340 #endif 341 342 #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) 343 344 /* 345 * Syscall structures 346 */ 347 348 /* The `sysent' for the new syscall */ 349 static struct sysent pmc_sysent = { 350 .sy_narg = 2, 351 .sy_call = pmc_syscall_handler, 352 }; 353 354 static struct syscall_module_data pmc_syscall_mod = { 355 .chainevh = load, 356 .chainarg = NULL, 357 .offset = &pmc_syscall_num, 358 .new_sysent = &pmc_sysent, 359 .old_sysent = { .sy_narg = 0, .sy_call = NULL }, 360 .flags = SY_THR_STATIC_KLD, 361 }; 362 363 static moduledata_t pmc_mod = { 364 .name = PMC_MODULE_NAME, 365 .evhand = syscall_module_handler, 366 .priv = &pmc_syscall_mod, 367 }; 368 369 #ifdef EARLY_AP_STARTUP 370 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SYSCALLS, SI_ORDER_ANY); 371 #else 372 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); 373 #endif 374 MODULE_VERSION(pmc, PMC_VERSION); 375 376 #ifdef HWPMC_DEBUG 377 enum pmc_dbgparse_state { 378 PMCDS_WS, /* in whitespace */ 379 PMCDS_MAJOR, /* seen a major keyword */ 380 PMCDS_MINOR 381 }; 382 383 static int 384 pmc_debugflags_parse(char *newstr, char *fence) 385 { 386 char c, *p, *q; 387 struct pmc_debugflags *tmpflags; 388 int error, found, *newbits, tmp; 389 size_t kwlen; 390 391 tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); 392 393 p = newstr; 394 error = 0; 395 396 for (; p < fence && (c = *p); p++) { 397 398 /* skip white space */ 399 if (c == ' ' || c == '\t') 400 continue; 401 402 /* look for a keyword followed by "=" */ 403 for (q = p; p < fence && (c = *p) && c != '='; p++) 404 ; 405 if (c != '=') { 406 error = EINVAL; 407 goto done; 408 } 409 410 kwlen = p - q; 411 newbits = NULL; 412 413 /* lookup flag group name */ 414 #define DBG_SET_FLAG_MAJ(S,F) \ 415 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 416 newbits = &tmpflags->pdb_ ## F; 417 418 DBG_SET_FLAG_MAJ("cpu", CPU); 419 DBG_SET_FLAG_MAJ("csw", CSW); 420 DBG_SET_FLAG_MAJ("logging", LOG); 421 DBG_SET_FLAG_MAJ("module", MOD); 422 DBG_SET_FLAG_MAJ("md", MDP); 423 DBG_SET_FLAG_MAJ("owner", OWN); 424 DBG_SET_FLAG_MAJ("pmc", PMC); 425 DBG_SET_FLAG_MAJ("process", PRC); 426 DBG_SET_FLAG_MAJ("sampling", SAM); 427 428 if (newbits == NULL) { 429 error = EINVAL; 430 goto done; 431 } 432 433 p++; /* skip the '=' */ 434 435 /* Now parse the individual flags */ 436 tmp = 0; 437 newflag: 438 for (q = p; p < fence && (c = *p); p++) 439 if (c == ' ' || c == '\t' || c == ',') 440 break; 441 442 /* p == fence or c == ws or c == "," or c == 0 */ 443 444 if ((kwlen = p - q) == 0) { 445 *newbits = tmp; 446 continue; 447 } 448 449 found = 0; 450 #define DBG_SET_FLAG_MIN(S,F) \ 451 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 452 tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) 453 454 /* a '*' denotes all possible flags in the group */ 455 if (kwlen == 1 && *q == '*') 456 tmp = found = ~0; 457 /* look for individual flag names */ 458 DBG_SET_FLAG_MIN("allocaterow", ALR); 459 DBG_SET_FLAG_MIN("allocate", ALL); 460 DBG_SET_FLAG_MIN("attach", ATT); 461 DBG_SET_FLAG_MIN("bind", BND); 462 DBG_SET_FLAG_MIN("config", CFG); 463 DBG_SET_FLAG_MIN("exec", EXC); 464 DBG_SET_FLAG_MIN("exit", EXT); 465 DBG_SET_FLAG_MIN("find", FND); 466 DBG_SET_FLAG_MIN("flush", FLS); 467 DBG_SET_FLAG_MIN("fork", FRK); 468 DBG_SET_FLAG_MIN("getbuf", GTB); 469 DBG_SET_FLAG_MIN("hook", PMH); 470 DBG_SET_FLAG_MIN("init", INI); 471 DBG_SET_FLAG_MIN("intr", INT); 472 DBG_SET_FLAG_MIN("linktarget", TLK); 473 DBG_SET_FLAG_MIN("mayberemove", OMR); 474 DBG_SET_FLAG_MIN("ops", OPS); 475 DBG_SET_FLAG_MIN("read", REA); 476 DBG_SET_FLAG_MIN("register", REG); 477 DBG_SET_FLAG_MIN("release", REL); 478 DBG_SET_FLAG_MIN("remove", ORM); 479 DBG_SET_FLAG_MIN("sample", SAM); 480 DBG_SET_FLAG_MIN("scheduleio", SIO); 481 DBG_SET_FLAG_MIN("select", SEL); 482 DBG_SET_FLAG_MIN("signal", SIG); 483 DBG_SET_FLAG_MIN("swi", SWI); 484 DBG_SET_FLAG_MIN("swo", SWO); 485 DBG_SET_FLAG_MIN("start", STA); 486 DBG_SET_FLAG_MIN("stop", STO); 487 DBG_SET_FLAG_MIN("syscall", PMS); 488 DBG_SET_FLAG_MIN("unlinktarget", TUL); 489 DBG_SET_FLAG_MIN("write", WRI); 490 if (found == 0) { 491 /* unrecognized flag name */ 492 error = EINVAL; 493 goto done; 494 } 495 496 if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ 497 *newbits = tmp; 498 continue; 499 } 500 501 p++; 502 goto newflag; 503 } 504 505 /* save the new flag set */ 506 bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); 507 508 done: 509 free(tmpflags, M_PMC); 510 return error; 511 } 512 513 static int 514 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) 515 { 516 char *fence, *newstr; 517 int error; 518 unsigned int n; 519 520 (void) arg1; (void) arg2; /* unused parameters */ 521 522 n = sizeof(pmc_debugstr); 523 newstr = malloc(n, M_PMC, M_WAITOK|M_ZERO); 524 (void) strlcpy(newstr, pmc_debugstr, n); 525 526 error = sysctl_handle_string(oidp, newstr, n, req); 527 528 /* if there is a new string, parse and copy it */ 529 if (error == 0 && req->newptr != NULL) { 530 fence = newstr + (n < req->newlen ? n : req->newlen + 1); 531 if ((error = pmc_debugflags_parse(newstr, fence)) == 0) 532 (void) strlcpy(pmc_debugstr, newstr, 533 sizeof(pmc_debugstr)); 534 } 535 536 free(newstr, M_PMC); 537 538 return error; 539 } 540 #endif 541 542 /* 543 * Map a row index to a classdep structure and return the adjusted row 544 * index for the PMC class index. 545 */ 546 static struct pmc_classdep * 547 pmc_ri_to_classdep(struct pmc_mdep *md, int ri, int *adjri) 548 { 549 struct pmc_classdep *pcd; 550 551 (void) md; 552 553 KASSERT(ri >= 0 && ri < md->pmd_npmc, 554 ("[pmc,%d] illegal row-index %d", __LINE__, ri)); 555 556 pcd = pmc_rowindex_to_classdep[ri]; 557 558 KASSERT(pcd != NULL, 559 ("[pmc,%d] ri %d null pcd", __LINE__, ri)); 560 561 *adjri = ri - pcd->pcd_ri; 562 563 KASSERT(*adjri >= 0 && *adjri < pcd->pcd_num, 564 ("[pmc,%d] adjusted row-index %d", __LINE__, *adjri)); 565 566 return (pcd); 567 } 568 569 /* 570 * Concurrency Control 571 * 572 * The driver manages the following data structures: 573 * 574 * - target process descriptors, one per target process 575 * - owner process descriptors (and attached lists), one per owner process 576 * - lookup hash tables for owner and target processes 577 * - PMC descriptors (and attached lists) 578 * - per-cpu hardware state 579 * - the 'hook' variable through which the kernel calls into 580 * this module 581 * - the machine hardware state (managed by the MD layer) 582 * 583 * These data structures are accessed from: 584 * 585 * - thread context-switch code 586 * - interrupt handlers (possibly on multiple cpus) 587 * - kernel threads on multiple cpus running on behalf of user 588 * processes doing system calls 589 * - this driver's private kernel threads 590 * 591 * = Locks and Locking strategy = 592 * 593 * The driver uses four locking strategies for its operation: 594 * 595 * - The global SX lock "pmc_sx" is used to protect internal 596 * data structures. 597 * 598 * Calls into the module by syscall() start with this lock being 599 * held in exclusive mode. Depending on the requested operation, 600 * the lock may be downgraded to 'shared' mode to allow more 601 * concurrent readers into the module. Calls into the module from 602 * other parts of the kernel acquire the lock in shared mode. 603 * 604 * This SX lock is held in exclusive mode for any operations that 605 * modify the linkages between the driver's internal data structures. 606 * 607 * The 'pmc_hook' function pointer is also protected by this lock. 608 * It is only examined with the sx lock held in exclusive mode. The 609 * kernel module is allowed to be unloaded only with the sx lock held 610 * in exclusive mode. In normal syscall handling, after acquiring the 611 * pmc_sx lock we first check that 'pmc_hook' is non-null before 612 * proceeding. This prevents races between the thread unloading the module 613 * and other threads seeking to use the module. 614 * 615 * - Lookups of target process structures and owner process structures 616 * cannot use the global "pmc_sx" SX lock because these lookups need 617 * to happen during context switches and in other critical sections 618 * where sleeping is not allowed. We protect these lookup tables 619 * with their own private spin-mutexes, "pmc_processhash_mtx" and 620 * "pmc_ownerhash_mtx". 621 * 622 * - Interrupt handlers work in a lock free manner. At interrupt 623 * time, handlers look at the PMC pointer (phw->phw_pmc) configured 624 * when the PMC was started. If this pointer is NULL, the interrupt 625 * is ignored after updating driver statistics. We ensure that this 626 * pointer is set (using an atomic operation if necessary) before the 627 * PMC hardware is started. Conversely, this pointer is unset atomically 628 * only after the PMC hardware is stopped. 629 * 630 * We ensure that everything needed for the operation of an 631 * interrupt handler is available without it needing to acquire any 632 * locks. We also ensure that a PMC's software state is destroyed only 633 * after the PMC is taken off hardware (on all CPUs). 634 * 635 * - Context-switch handling with process-private PMCs needs more 636 * care. 637 * 638 * A given process may be the target of multiple PMCs. For example, 639 * PMCATTACH and PMCDETACH may be requested by a process on one CPU 640 * while the target process is running on another. A PMC could also 641 * be getting released because its owner is exiting. We tackle 642 * these situations in the following manner: 643 * 644 * - each target process structure 'pmc_process' has an array 645 * of 'struct pmc *' pointers, one for each hardware PMC. 646 * 647 * - At context switch IN time, each "target" PMC in RUNNING state 648 * gets started on hardware and a pointer to each PMC is copied into 649 * the per-cpu phw array. The 'runcount' for the PMC is 650 * incremented. 651 * 652 * - At context switch OUT time, all process-virtual PMCs are stopped 653 * on hardware. The saved value is added to the PMCs value field 654 * only if the PMC is in a non-deleted state (the PMCs state could 655 * have changed during the current time slice). 656 * 657 * Note that since in-between a switch IN on a processor and a switch 658 * OUT, the PMC could have been released on another CPU. Therefore 659 * context switch OUT always looks at the hardware state to turn 660 * OFF PMCs and will update a PMC's saved value only if reachable 661 * from the target process record. 662 * 663 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could 664 * be attached to many processes at the time of the call and could 665 * be active on multiple CPUs). 666 * 667 * We prevent further scheduling of the PMC by marking it as in 668 * state 'DELETED'. If the runcount of the PMC is non-zero then 669 * this PMC is currently running on a CPU somewhere. The thread 670 * doing the PMCRELEASE operation waits by repeatedly doing a 671 * pause() till the runcount comes to zero. 672 * 673 * The contents of a PMC descriptor (struct pmc) are protected using 674 * a spin-mutex. In order to save space, we use a mutex pool. 675 * 676 * In terms of lock types used by witness(4), we use: 677 * - Type "pmc-sx", used by the global SX lock. 678 * - Type "pmc-sleep", for sleep mutexes used by logger threads. 679 * - Type "pmc-per-proc", for protecting PMC owner descriptors. 680 * - Type "pmc-leaf", used for all other spin mutexes. 681 */ 682 683 /* 684 * save the cpu binding of the current kthread 685 */ 686 687 static void 688 pmc_save_cpu_binding(struct pmc_binding *pb) 689 { 690 PMCDBG0(CPU,BND,2, "save-cpu"); 691 thread_lock(curthread); 692 pb->pb_bound = sched_is_bound(curthread); 693 pb->pb_cpu = curthread->td_oncpu; 694 thread_unlock(curthread); 695 PMCDBG1(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); 696 } 697 698 /* 699 * restore the cpu binding of the current thread 700 */ 701 702 static void 703 pmc_restore_cpu_binding(struct pmc_binding *pb) 704 { 705 PMCDBG2(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", 706 curthread->td_oncpu, pb->pb_cpu); 707 thread_lock(curthread); 708 if (pb->pb_bound) 709 sched_bind(curthread, pb->pb_cpu); 710 else 711 sched_unbind(curthread); 712 thread_unlock(curthread); 713 PMCDBG0(CPU,BND,2, "restore-cpu done"); 714 } 715 716 /* 717 * move execution over the specified cpu and bind it there. 718 */ 719 720 static void 721 pmc_select_cpu(int cpu) 722 { 723 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 724 ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); 725 726 /* Never move to an inactive CPU. */ 727 KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " 728 "CPU %d", __LINE__, cpu)); 729 730 PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d", cpu); 731 thread_lock(curthread); 732 sched_bind(curthread, cpu); 733 thread_unlock(curthread); 734 735 KASSERT(curthread->td_oncpu == cpu, 736 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, 737 cpu, curthread->td_oncpu)); 738 739 PMCDBG1(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); 740 } 741 742 /* 743 * Force a context switch. 744 * 745 * We do this by pause'ing for 1 tick -- invoking mi_switch() is not 746 * guaranteed to force a context switch. 747 */ 748 749 static void 750 pmc_force_context_switch(void) 751 { 752 753 pause("pmcctx", 1); 754 } 755 756 /* 757 * Get the file name for an executable. This is a simple wrapper 758 * around vn_fullpath(9). 759 */ 760 761 static void 762 pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) 763 { 764 765 *fullpath = "unknown"; 766 *freepath = NULL; 767 vn_fullpath(curthread, v, fullpath, freepath); 768 } 769 770 /* 771 * remove an process owning PMCs 772 */ 773 774 void 775 pmc_remove_owner(struct pmc_owner *po) 776 { 777 struct pmc *pm, *tmp; 778 779 sx_assert(&pmc_sx, SX_XLOCKED); 780 781 PMCDBG1(OWN,ORM,1, "remove-owner po=%p", po); 782 783 /* Remove descriptor from the owner hash table */ 784 LIST_REMOVE(po, po_next); 785 786 /* release all owned PMC descriptors */ 787 LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { 788 PMCDBG1(OWN,ORM,2, "pmc=%p", pm); 789 KASSERT(pm->pm_owner == po, 790 ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); 791 792 pmc_release_pmc_descriptor(pm); /* will unlink from the list */ 793 pmc_destroy_pmc_descriptor(pm); 794 } 795 796 KASSERT(po->po_sscount == 0, 797 ("[pmc,%d] SS count not zero", __LINE__)); 798 KASSERT(LIST_EMPTY(&po->po_pmcs), 799 ("[pmc,%d] PMC list not empty", __LINE__)); 800 801 /* de-configure the log file if present */ 802 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 803 pmclog_deconfigure_log(po); 804 } 805 806 /* 807 * remove an owner process record if all conditions are met. 808 */ 809 810 static void 811 pmc_maybe_remove_owner(struct pmc_owner *po) 812 { 813 814 PMCDBG1(OWN,OMR,1, "maybe-remove-owner po=%p", po); 815 816 /* 817 * Remove owner record if 818 * - this process does not own any PMCs 819 * - this process has not allocated a system-wide sampling buffer 820 */ 821 822 if (LIST_EMPTY(&po->po_pmcs) && 823 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { 824 pmc_remove_owner(po); 825 pmc_destroy_owner_descriptor(po); 826 } 827 } 828 829 /* 830 * Add an association between a target process and a PMC. 831 */ 832 833 static void 834 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) 835 { 836 int ri; 837 struct pmc_target *pt; 838 839 sx_assert(&pmc_sx, SX_XLOCKED); 840 841 KASSERT(pm != NULL && pp != NULL, 842 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 843 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 844 ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", 845 __LINE__, pm, pp->pp_proc->p_pid)); 846 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= ((int) md->pmd_npmc - 1), 847 ("[pmc,%d] Illegal reference count %d for process record %p", 848 __LINE__, pp->pp_refcnt, (void *) pp)); 849 850 ri = PMC_TO_ROWINDEX(pm); 851 852 PMCDBG3(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", 853 pm, ri, pp); 854 855 #ifdef HWPMC_DEBUG 856 LIST_FOREACH(pt, &pm->pm_targets, pt_next) 857 if (pt->pt_process == pp) 858 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", 859 __LINE__, pp, pm)); 860 #endif 861 862 pt = malloc(sizeof(struct pmc_target), M_PMC, M_WAITOK|M_ZERO); 863 pt->pt_process = pp; 864 865 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); 866 867 atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, 868 (uintptr_t)pm); 869 870 if (pm->pm_owner->po_owner == pp->pp_proc) 871 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; 872 873 /* 874 * Initialize the per-process values at this row index. 875 */ 876 pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? 877 pm->pm_sc.pm_reloadcount : 0; 878 879 pp->pp_refcnt++; 880 881 } 882 883 /* 884 * Removes the association between a target process and a PMC. 885 */ 886 887 static void 888 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) 889 { 890 int ri; 891 struct proc *p; 892 struct pmc_target *ptgt; 893 894 sx_assert(&pmc_sx, SX_XLOCKED); 895 896 KASSERT(pm != NULL && pp != NULL, 897 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 898 899 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt <= (int) md->pmd_npmc, 900 ("[pmc,%d] Illegal ref count %d on process record %p", 901 __LINE__, pp->pp_refcnt, (void *) pp)); 902 903 ri = PMC_TO_ROWINDEX(pm); 904 905 PMCDBG3(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", 906 pm, ri, pp); 907 908 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, 909 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, 910 ri, pm, pp->pp_pmcs[ri].pp_pmc)); 911 912 pp->pp_pmcs[ri].pp_pmc = NULL; 913 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; 914 915 /* Remove owner-specific flags */ 916 if (pm->pm_owner->po_owner == pp->pp_proc) { 917 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; 918 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; 919 } 920 921 pp->pp_refcnt--; 922 923 /* Remove the target process from the PMC structure */ 924 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) 925 if (ptgt->pt_process == pp) 926 break; 927 928 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " 929 "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); 930 931 LIST_REMOVE(ptgt, pt_next); 932 free(ptgt, M_PMC); 933 934 /* if the PMC now lacks targets, send the owner a SIGIO */ 935 if (LIST_EMPTY(&pm->pm_targets)) { 936 p = pm->pm_owner->po_owner; 937 PROC_LOCK(p); 938 kern_psignal(p, SIGIO); 939 PROC_UNLOCK(p); 940 941 PMCDBG2(PRC,SIG,2, "signalling proc=%p signal=%d", p, 942 SIGIO); 943 } 944 } 945 946 /* 947 * Check if PMC 'pm' may be attached to target process 't'. 948 */ 949 950 static int 951 pmc_can_attach(struct pmc *pm, struct proc *t) 952 { 953 struct proc *o; /* pmc owner */ 954 struct ucred *oc, *tc; /* owner, target credentials */ 955 int decline_attach, i; 956 957 /* 958 * A PMC's owner can always attach that PMC to itself. 959 */ 960 961 if ((o = pm->pm_owner->po_owner) == t) 962 return 0; 963 964 PROC_LOCK(o); 965 oc = o->p_ucred; 966 crhold(oc); 967 PROC_UNLOCK(o); 968 969 PROC_LOCK(t); 970 tc = t->p_ucred; 971 crhold(tc); 972 PROC_UNLOCK(t); 973 974 /* 975 * The effective uid of the PMC owner should match at least one 976 * of the {effective,real,saved} uids of the target process. 977 */ 978 979 decline_attach = oc->cr_uid != tc->cr_uid && 980 oc->cr_uid != tc->cr_svuid && 981 oc->cr_uid != tc->cr_ruid; 982 983 /* 984 * Every one of the target's group ids, must be in the owner's 985 * group list. 986 */ 987 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) 988 decline_attach = !groupmember(tc->cr_groups[i], oc); 989 990 /* check the read and saved gids too */ 991 if (decline_attach == 0) 992 decline_attach = !groupmember(tc->cr_rgid, oc) || 993 !groupmember(tc->cr_svgid, oc); 994 995 crfree(tc); 996 crfree(oc); 997 998 return !decline_attach; 999 } 1000 1001 /* 1002 * Attach a process to a PMC. 1003 */ 1004 1005 static int 1006 pmc_attach_one_process(struct proc *p, struct pmc *pm) 1007 { 1008 int ri; 1009 char *fullpath, *freepath; 1010 struct pmc_process *pp; 1011 1012 sx_assert(&pmc_sx, SX_XLOCKED); 1013 1014 PMCDBG5(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, 1015 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1016 1017 /* 1018 * Locate the process descriptor corresponding to process 'p', 1019 * allocating space as needed. 1020 * 1021 * Verify that rowindex 'pm_rowindex' is free in the process 1022 * descriptor. 1023 * 1024 * If not, allocate space for a descriptor and link the 1025 * process descriptor and PMC. 1026 */ 1027 ri = PMC_TO_ROWINDEX(pm); 1028 1029 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) 1030 return ENOMEM; 1031 1032 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ 1033 return EEXIST; 1034 1035 if (pp->pp_pmcs[ri].pp_pmc != NULL) 1036 return EBUSY; 1037 1038 pmc_link_target_process(pm, pp); 1039 1040 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && 1041 (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) 1042 pm->pm_flags |= PMC_F_NEEDS_LOGFILE; 1043 1044 pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ 1045 1046 /* issue an attach event to a configured log file */ 1047 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { 1048 if (p->p_flag & P_KPROC) { 1049 fullpath = kernelname; 1050 freepath = NULL; 1051 } else { 1052 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 1053 pmclog_process_pmcattach(pm, p->p_pid, fullpath); 1054 } 1055 free(freepath, M_TEMP); 1056 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1057 pmc_log_process_mappings(pm->pm_owner, p); 1058 } 1059 /* mark process as using HWPMCs */ 1060 PROC_LOCK(p); 1061 p->p_flag |= P_HWPMC; 1062 PROC_UNLOCK(p); 1063 1064 return 0; 1065 } 1066 1067 /* 1068 * Attach a process and optionally its children 1069 */ 1070 1071 static int 1072 pmc_attach_process(struct proc *p, struct pmc *pm) 1073 { 1074 int error; 1075 struct proc *top; 1076 1077 sx_assert(&pmc_sx, SX_XLOCKED); 1078 1079 PMCDBG5(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, 1080 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1081 1082 1083 /* 1084 * If this PMC successfully allowed a GETMSR operation 1085 * in the past, disallow further ATTACHes. 1086 */ 1087 1088 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) 1089 return EPERM; 1090 1091 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1092 return pmc_attach_one_process(p, pm); 1093 1094 /* 1095 * Traverse all child processes, attaching them to 1096 * this PMC. 1097 */ 1098 1099 sx_slock(&proctree_lock); 1100 1101 top = p; 1102 1103 for (;;) { 1104 if ((error = pmc_attach_one_process(p, pm)) != 0) 1105 break; 1106 if (!LIST_EMPTY(&p->p_children)) 1107 p = LIST_FIRST(&p->p_children); 1108 else for (;;) { 1109 if (p == top) 1110 goto done; 1111 if (LIST_NEXT(p, p_sibling)) { 1112 p = LIST_NEXT(p, p_sibling); 1113 break; 1114 } 1115 p = p->p_pptr; 1116 } 1117 } 1118 1119 if (error) 1120 (void) pmc_detach_process(top, pm); 1121 1122 done: 1123 sx_sunlock(&proctree_lock); 1124 return error; 1125 } 1126 1127 /* 1128 * Detach a process from a PMC. If there are no other PMCs tracking 1129 * this process, remove the process structure from its hash table. If 1130 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. 1131 */ 1132 1133 static int 1134 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) 1135 { 1136 int ri; 1137 struct pmc_process *pp; 1138 1139 sx_assert(&pmc_sx, SX_XLOCKED); 1140 1141 KASSERT(pm != NULL, 1142 ("[pmc,%d] null pm pointer", __LINE__)); 1143 1144 ri = PMC_TO_ROWINDEX(pm); 1145 1146 PMCDBG6(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", 1147 pm, ri, p, p->p_pid, p->p_comm, flags); 1148 1149 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) 1150 return ESRCH; 1151 1152 if (pp->pp_pmcs[ri].pp_pmc != pm) 1153 return EINVAL; 1154 1155 pmc_unlink_target_process(pm, pp); 1156 1157 /* Issue a detach entry if a log file is configured */ 1158 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) 1159 pmclog_process_pmcdetach(pm, p->p_pid); 1160 1161 /* 1162 * If there are no PMCs targeting this process, we remove its 1163 * descriptor from the target hash table and unset the P_HWPMC 1164 * flag in the struct proc. 1165 */ 1166 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, 1167 ("[pmc,%d] Illegal refcnt %d for process struct %p", 1168 __LINE__, pp->pp_refcnt, pp)); 1169 1170 if (pp->pp_refcnt != 0) /* still a target of some PMC */ 1171 return 0; 1172 1173 pmc_remove_process_descriptor(pp); 1174 1175 if (flags & PMC_FLAG_REMOVE) 1176 free(pp, M_PMC); 1177 1178 PROC_LOCK(p); 1179 p->p_flag &= ~P_HWPMC; 1180 PROC_UNLOCK(p); 1181 1182 return 0; 1183 } 1184 1185 /* 1186 * Detach a process and optionally its descendants from a PMC. 1187 */ 1188 1189 static int 1190 pmc_detach_process(struct proc *p, struct pmc *pm) 1191 { 1192 struct proc *top; 1193 1194 sx_assert(&pmc_sx, SX_XLOCKED); 1195 1196 PMCDBG5(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, 1197 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1198 1199 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1200 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1201 1202 /* 1203 * Traverse all children, detaching them from this PMC. We 1204 * ignore errors since we could be detaching a PMC from a 1205 * partially attached proc tree. 1206 */ 1207 1208 sx_slock(&proctree_lock); 1209 1210 top = p; 1211 1212 for (;;) { 1213 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1214 1215 if (!LIST_EMPTY(&p->p_children)) 1216 p = LIST_FIRST(&p->p_children); 1217 else for (;;) { 1218 if (p == top) 1219 goto done; 1220 if (LIST_NEXT(p, p_sibling)) { 1221 p = LIST_NEXT(p, p_sibling); 1222 break; 1223 } 1224 p = p->p_pptr; 1225 } 1226 } 1227 1228 done: 1229 sx_sunlock(&proctree_lock); 1230 1231 if (LIST_EMPTY(&pm->pm_targets)) 1232 pm->pm_flags &= ~PMC_F_ATTACH_DONE; 1233 1234 return 0; 1235 } 1236 1237 1238 /* 1239 * Thread context switch IN 1240 */ 1241 1242 static void 1243 pmc_process_csw_in(struct thread *td) 1244 { 1245 int cpu; 1246 unsigned int adjri, ri; 1247 struct pmc *pm; 1248 struct proc *p; 1249 struct pmc_cpu *pc; 1250 struct pmc_hw *phw; 1251 pmc_value_t newvalue; 1252 struct pmc_process *pp; 1253 struct pmc_classdep *pcd; 1254 1255 p = td->td_proc; 1256 1257 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) 1258 return; 1259 1260 KASSERT(pp->pp_proc == td->td_proc, 1261 ("[pmc,%d] not my thread state", __LINE__)); 1262 1263 critical_enter(); /* no preemption from this point */ 1264 1265 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1266 1267 PMCDBG5(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1268 p->p_pid, p->p_comm, pp); 1269 1270 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1271 ("[pmc,%d] weird CPU id %d", __LINE__, cpu)); 1272 1273 pc = pmc_pcpu[cpu]; 1274 1275 for (ri = 0; ri < md->pmd_npmc; ri++) { 1276 1277 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) 1278 continue; 1279 1280 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 1281 ("[pmc,%d] Target PMC in non-virtual mode (%d)", 1282 __LINE__, PMC_TO_MODE(pm))); 1283 1284 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1285 ("[pmc,%d] Row index mismatch pmc %d != ri %d", 1286 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1287 1288 /* 1289 * Only PMCs that are marked as 'RUNNING' need 1290 * be placed on hardware. 1291 */ 1292 1293 if (pm->pm_state != PMC_STATE_RUNNING) 1294 continue; 1295 1296 /* increment PMC runcount */ 1297 counter_u64_add(pm->pm_runcount, 1); 1298 1299 /* configure the HWPMC we are going to use. */ 1300 pcd = pmc_ri_to_classdep(md, ri, &adjri); 1301 pcd->pcd_config_pmc(cpu, adjri, pm); 1302 1303 phw = pc->pc_hwpmcs[ri]; 1304 1305 KASSERT(phw != NULL, 1306 ("[pmc,%d] null hw pointer", __LINE__)); 1307 1308 KASSERT(phw->phw_pmc == pm, 1309 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, 1310 phw->phw_pmc, pm)); 1311 1312 /* 1313 * Write out saved value and start the PMC. 1314 * 1315 * Sampling PMCs use a per-process value, while 1316 * counting mode PMCs use a per-pmc value that is 1317 * inherited across descendants. 1318 */ 1319 if (PMC_TO_MODE(pm) == PMC_MODE_TS) { 1320 mtx_pool_lock_spin(pmc_mtxpool, pm); 1321 1322 /* 1323 * Use the saved value calculated after the most recent 1324 * thread switch out to start this counter. Reset 1325 * the saved count in case another thread from this 1326 * process switches in before any threads switch out. 1327 */ 1328 newvalue = PMC_PCPU_SAVED(cpu,ri) = 1329 pp->pp_pmcs[ri].pp_pmcval; 1330 pp->pp_pmcs[ri].pp_pmcval = pm->pm_sc.pm_reloadcount; 1331 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1332 } else { 1333 KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, 1334 ("[pmc,%d] illegal mode=%d", __LINE__, 1335 PMC_TO_MODE(pm))); 1336 mtx_pool_lock_spin(pmc_mtxpool, pm); 1337 newvalue = PMC_PCPU_SAVED(cpu, ri) = 1338 pm->pm_gv.pm_savedvalue; 1339 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1340 } 1341 1342 PMCDBG3(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); 1343 1344 pcd->pcd_write_pmc(cpu, adjri, newvalue); 1345 1346 /* If a sampling mode PMC, reset stalled state. */ 1347 if (PMC_TO_MODE(pm) == PMC_MODE_TS) 1348 pm->pm_pcpu_state[cpu].pps_stalled = 0; 1349 1350 /* Indicate that we desire this to run. */ 1351 pm->pm_pcpu_state[cpu].pps_cpustate = 1; 1352 1353 /* Start the PMC. */ 1354 pcd->pcd_start_pmc(cpu, adjri); 1355 } 1356 1357 /* 1358 * perform any other architecture/cpu dependent thread 1359 * switch-in actions. 1360 */ 1361 1362 (void) (*md->pmd_switch_in)(pc, pp); 1363 1364 critical_exit(); 1365 1366 } 1367 1368 /* 1369 * Thread context switch OUT. 1370 */ 1371 1372 static void 1373 pmc_process_csw_out(struct thread *td) 1374 { 1375 int cpu; 1376 int64_t tmp; 1377 struct pmc *pm; 1378 struct proc *p; 1379 enum pmc_mode mode; 1380 struct pmc_cpu *pc; 1381 pmc_value_t newvalue; 1382 unsigned int adjri, ri; 1383 struct pmc_process *pp; 1384 struct pmc_classdep *pcd; 1385 1386 1387 /* 1388 * Locate our process descriptor; this may be NULL if 1389 * this process is exiting and we have already removed 1390 * the process from the target process table. 1391 * 1392 * Note that due to kernel preemption, multiple 1393 * context switches may happen while the process is 1394 * exiting. 1395 * 1396 * Note also that if the target process cannot be 1397 * found we still need to deconfigure any PMCs that 1398 * are currently running on hardware. 1399 */ 1400 1401 p = td->td_proc; 1402 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); 1403 1404 /* 1405 * save PMCs 1406 */ 1407 1408 critical_enter(); 1409 1410 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1411 1412 PMCDBG5(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1413 p->p_pid, p->p_comm, pp); 1414 1415 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1416 ("[pmc,%d weird CPU id %d", __LINE__, cpu)); 1417 1418 pc = pmc_pcpu[cpu]; 1419 1420 /* 1421 * When a PMC gets unlinked from a target PMC, it will 1422 * be removed from the target's pp_pmc[] array. 1423 * 1424 * However, on a MP system, the target could have been 1425 * executing on another CPU at the time of the unlink. 1426 * So, at context switch OUT time, we need to look at 1427 * the hardware to determine if a PMC is scheduled on 1428 * it. 1429 */ 1430 1431 for (ri = 0; ri < md->pmd_npmc; ri++) { 1432 1433 pcd = pmc_ri_to_classdep(md, ri, &adjri); 1434 pm = NULL; 1435 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); 1436 1437 if (pm == NULL) /* nothing at this row index */ 1438 continue; 1439 1440 mode = PMC_TO_MODE(pm); 1441 if (!PMC_IS_VIRTUAL_MODE(mode)) 1442 continue; /* not a process virtual PMC */ 1443 1444 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1445 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 1446 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1447 1448 /* 1449 * Change desired state, and then stop if not stalled. 1450 * This two-step dance should avoid race conditions where 1451 * an interrupt re-enables the PMC after this code has 1452 * already checked the pm_stalled flag. 1453 */ 1454 pm->pm_pcpu_state[cpu].pps_cpustate = 0; 1455 if (pm->pm_pcpu_state[cpu].pps_stalled == 0) 1456 pcd->pcd_stop_pmc(cpu, adjri); 1457 1458 /* reduce this PMC's runcount */ 1459 counter_u64_add(pm->pm_runcount, -1); 1460 1461 /* 1462 * If this PMC is associated with this process, 1463 * save the reading. 1464 */ 1465 1466 if (pm->pm_state != PMC_STATE_DELETED && pp != NULL && 1467 pp->pp_pmcs[ri].pp_pmc != NULL) { 1468 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 1469 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, 1470 pm, ri, pp->pp_pmcs[ri].pp_pmc)); 1471 1472 KASSERT(pp->pp_refcnt > 0, 1473 ("[pmc,%d] pp refcnt = %d", __LINE__, 1474 pp->pp_refcnt)); 1475 1476 pcd->pcd_read_pmc(cpu, adjri, &newvalue); 1477 1478 if (mode == PMC_MODE_TS) { 1479 PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (samp)", 1480 cpu, ri, PMC_PCPU_SAVED(cpu,ri) - newvalue); 1481 1482 /* 1483 * For sampling process-virtual PMCs, 1484 * newvalue is the number of events to be seen 1485 * until the next sampling interrupt. 1486 * We can just add the events left from this 1487 * invocation to the counter, then adjust 1488 * in case we overflow our range. 1489 * 1490 * (Recall that we reload the counter every 1491 * time we use it.) 1492 */ 1493 mtx_pool_lock_spin(pmc_mtxpool, pm); 1494 1495 pp->pp_pmcs[ri].pp_pmcval += newvalue; 1496 if (pp->pp_pmcs[ri].pp_pmcval > 1497 pm->pm_sc.pm_reloadcount) 1498 pp->pp_pmcs[ri].pp_pmcval -= 1499 pm->pm_sc.pm_reloadcount; 1500 KASSERT(pp->pp_pmcs[ri].pp_pmcval > 0 && 1501 pp->pp_pmcs[ri].pp_pmcval <= 1502 pm->pm_sc.pm_reloadcount, 1503 ("[pmc,%d] pp_pmcval outside of expected " 1504 "range cpu=%d ri=%d pp_pmcval=%jx " 1505 "pm_reloadcount=%jx", __LINE__, cpu, ri, 1506 pp->pp_pmcs[ri].pp_pmcval, 1507 pm->pm_sc.pm_reloadcount)); 1508 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1509 1510 } else { 1511 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); 1512 1513 PMCDBG3(CSW,SWO,1,"cpu=%d ri=%d tmp=%jd (count)", 1514 cpu, ri, tmp); 1515 1516 /* 1517 * For counting process-virtual PMCs, 1518 * we expect the count to be 1519 * increasing monotonically, modulo a 64 1520 * bit wraparound. 1521 */ 1522 KASSERT(tmp >= 0, 1523 ("[pmc,%d] negative increment cpu=%d " 1524 "ri=%d newvalue=%jx saved=%jx " 1525 "incr=%jx", __LINE__, cpu, ri, 1526 newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); 1527 1528 mtx_pool_lock_spin(pmc_mtxpool, pm); 1529 pm->pm_gv.pm_savedvalue += tmp; 1530 pp->pp_pmcs[ri].pp_pmcval += tmp; 1531 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1532 1533 if (pm->pm_flags & PMC_F_LOG_PROCCSW) 1534 pmclog_process_proccsw(pm, pp, tmp); 1535 } 1536 } 1537 1538 /* mark hardware as free */ 1539 pcd->pcd_config_pmc(cpu, adjri, NULL); 1540 } 1541 1542 /* 1543 * perform any other architecture/cpu dependent thread 1544 * switch out functions. 1545 */ 1546 1547 (void) (*md->pmd_switch_out)(pc, pp); 1548 1549 critical_exit(); 1550 } 1551 1552 /* 1553 * A mapping change for a process. 1554 */ 1555 1556 static void 1557 pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) 1558 { 1559 int ri; 1560 pid_t pid; 1561 char *fullpath, *freepath; 1562 const struct pmc *pm; 1563 struct pmc_owner *po; 1564 const struct pmc_process *pp; 1565 1566 freepath = fullpath = NULL; 1567 epoch_exit(global_epoch); 1568 pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); 1569 1570 pid = td->td_proc->p_pid; 1571 1572 epoch_enter(global_epoch); 1573 /* Inform owners of all system-wide sampling PMCs. */ 1574 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1575 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1576 pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); 1577 1578 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1579 goto done; 1580 1581 /* 1582 * Inform sampling PMC owners tracking this process. 1583 */ 1584 for (ri = 0; ri < md->pmd_npmc; ri++) 1585 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1586 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1587 pmclog_process_map_in(pm->pm_owner, 1588 pid, pkm->pm_address, fullpath); 1589 1590 done: 1591 if (freepath) 1592 free(freepath, M_TEMP); 1593 } 1594 1595 1596 /* 1597 * Log an munmap request. 1598 */ 1599 1600 static void 1601 pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) 1602 { 1603 int ri; 1604 pid_t pid; 1605 struct pmc_owner *po; 1606 const struct pmc *pm; 1607 const struct pmc_process *pp; 1608 1609 pid = td->td_proc->p_pid; 1610 1611 epoch_enter(global_epoch); 1612 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1613 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1614 pmclog_process_map_out(po, pid, pkm->pm_address, 1615 pkm->pm_address + pkm->pm_size); 1616 epoch_exit(global_epoch); 1617 1618 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1619 return; 1620 1621 for (ri = 0; ri < md->pmd_npmc; ri++) 1622 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1623 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1624 pmclog_process_map_out(pm->pm_owner, pid, 1625 pkm->pm_address, pkm->pm_address + pkm->pm_size); 1626 } 1627 1628 /* 1629 * Log mapping information about the kernel. 1630 */ 1631 1632 static void 1633 pmc_log_kernel_mappings(struct pmc *pm) 1634 { 1635 struct pmc_owner *po; 1636 struct pmckern_map_in *km, *kmbase; 1637 1638 MPASS(in_epoch() || sx_xlocked(&pmc_sx)); 1639 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 1640 ("[pmc,%d] non-sampling PMC (%p) desires mapping information", 1641 __LINE__, (void *) pm)); 1642 1643 po = pm->pm_owner; 1644 1645 if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) 1646 return; 1647 1648 /* 1649 * Log the current set of kernel modules. 1650 */ 1651 kmbase = linker_hwpmc_list_objects(); 1652 for (km = kmbase; km->pm_file != NULL; km++) { 1653 PMCDBG2(LOG,REG,1,"%s %p", (char *) km->pm_file, 1654 (void *) km->pm_address); 1655 pmclog_process_map_in(po, (pid_t) -1, km->pm_address, 1656 km->pm_file); 1657 } 1658 free(kmbase, M_LINKER); 1659 1660 po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; 1661 } 1662 1663 /* 1664 * Log the mappings for a single process. 1665 */ 1666 1667 static void 1668 pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) 1669 { 1670 vm_map_t map; 1671 struct vnode *vp; 1672 struct vmspace *vm; 1673 vm_map_entry_t entry; 1674 vm_offset_t last_end; 1675 u_int last_timestamp; 1676 struct vnode *last_vp; 1677 vm_offset_t start_addr; 1678 vm_object_t obj, lobj, tobj; 1679 char *fullpath, *freepath; 1680 1681 last_vp = NULL; 1682 last_end = (vm_offset_t) 0; 1683 fullpath = freepath = NULL; 1684 1685 if ((vm = vmspace_acquire_ref(p)) == NULL) 1686 return; 1687 1688 map = &vm->vm_map; 1689 vm_map_lock_read(map); 1690 1691 for (entry = map->header.next; entry != &map->header; entry = entry->next) { 1692 1693 if (entry == NULL) { 1694 PMCDBG2(LOG,OPS,2, "hwpmc: vm_map entry unexpectedly " 1695 "NULL! pid=%d vm_map=%p\n", p->p_pid, map); 1696 break; 1697 } 1698 1699 /* 1700 * We only care about executable map entries. 1701 */ 1702 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || 1703 !(entry->protection & VM_PROT_EXECUTE) || 1704 (entry->object.vm_object == NULL)) { 1705 continue; 1706 } 1707 1708 obj = entry->object.vm_object; 1709 VM_OBJECT_RLOCK(obj); 1710 1711 /* 1712 * Walk the backing_object list to find the base 1713 * (non-shadowed) vm_object. 1714 */ 1715 for (lobj = tobj = obj; tobj != NULL; tobj = tobj->backing_object) { 1716 if (tobj != obj) 1717 VM_OBJECT_RLOCK(tobj); 1718 if (lobj != obj) 1719 VM_OBJECT_RUNLOCK(lobj); 1720 lobj = tobj; 1721 } 1722 1723 /* 1724 * At this point lobj is the base vm_object and it is locked. 1725 */ 1726 if (lobj == NULL) { 1727 PMCDBG3(LOG,OPS,2, "hwpmc: lobj unexpectedly NULL! pid=%d " 1728 "vm_map=%p vm_obj=%p\n", p->p_pid, map, obj); 1729 VM_OBJECT_RUNLOCK(obj); 1730 continue; 1731 } 1732 1733 vp = vm_object_vnode(lobj); 1734 if (vp == NULL) { 1735 if (lobj != obj) 1736 VM_OBJECT_RUNLOCK(lobj); 1737 VM_OBJECT_RUNLOCK(obj); 1738 continue; 1739 } 1740 1741 /* 1742 * Skip contiguous regions that point to the same 1743 * vnode, so we don't emit redundant MAP-IN 1744 * directives. 1745 */ 1746 if (entry->start == last_end && vp == last_vp) { 1747 last_end = entry->end; 1748 if (lobj != obj) 1749 VM_OBJECT_RUNLOCK(lobj); 1750 VM_OBJECT_RUNLOCK(obj); 1751 continue; 1752 } 1753 1754 /* 1755 * We don't want to keep the proc's vm_map or this 1756 * vm_object locked while we walk the pathname, since 1757 * vn_fullpath() can sleep. However, if we drop the 1758 * lock, it's possible for concurrent activity to 1759 * modify the vm_map list. To protect against this, 1760 * we save the vm_map timestamp before we release the 1761 * lock, and check it after we reacquire the lock 1762 * below. 1763 */ 1764 start_addr = entry->start; 1765 last_end = entry->end; 1766 last_timestamp = map->timestamp; 1767 vm_map_unlock_read(map); 1768 1769 vref(vp); 1770 if (lobj != obj) 1771 VM_OBJECT_RUNLOCK(lobj); 1772 1773 VM_OBJECT_RUNLOCK(obj); 1774 1775 freepath = NULL; 1776 pmc_getfilename(vp, &fullpath, &freepath); 1777 last_vp = vp; 1778 1779 vrele(vp); 1780 1781 vp = NULL; 1782 pmclog_process_map_in(po, p->p_pid, start_addr, fullpath); 1783 if (freepath) 1784 free(freepath, M_TEMP); 1785 1786 vm_map_lock_read(map); 1787 1788 /* 1789 * If our saved timestamp doesn't match, this means 1790 * that the vm_map was modified out from under us and 1791 * we can't trust our current "entry" pointer. Do a 1792 * new lookup for this entry. If there is no entry 1793 * for this address range, vm_map_lookup_entry() will 1794 * return the previous one, so we always want to go to 1795 * entry->next on the next loop iteration. 1796 * 1797 * There is an edge condition here that can occur if 1798 * there is no entry at or before this address. In 1799 * this situation, vm_map_lookup_entry returns 1800 * &map->header, which would cause our loop to abort 1801 * without processing the rest of the map. However, 1802 * in practice this will never happen for process 1803 * vm_map. This is because the executable's text 1804 * segment is the first mapping in the proc's address 1805 * space, and this mapping is never removed until the 1806 * process exits, so there will always be a non-header 1807 * entry at or before the requested address for 1808 * vm_map_lookup_entry to return. 1809 */ 1810 if (map->timestamp != last_timestamp) 1811 vm_map_lookup_entry(map, last_end - 1, &entry); 1812 } 1813 1814 vm_map_unlock_read(map); 1815 vmspace_free(vm); 1816 return; 1817 } 1818 1819 /* 1820 * Log mappings for all processes in the system. 1821 */ 1822 1823 static void 1824 pmc_log_all_process_mappings(struct pmc_owner *po) 1825 { 1826 struct proc *p, *top; 1827 1828 sx_assert(&pmc_sx, SX_XLOCKED); 1829 1830 if ((p = pfind(1)) == NULL) 1831 panic("[pmc,%d] Cannot find init", __LINE__); 1832 1833 PROC_UNLOCK(p); 1834 1835 sx_slock(&proctree_lock); 1836 1837 top = p; 1838 1839 for (;;) { 1840 pmc_log_process_mappings(po, p); 1841 if (!LIST_EMPTY(&p->p_children)) 1842 p = LIST_FIRST(&p->p_children); 1843 else for (;;) { 1844 if (p == top) 1845 goto done; 1846 if (LIST_NEXT(p, p_sibling)) { 1847 p = LIST_NEXT(p, p_sibling); 1848 break; 1849 } 1850 p = p->p_pptr; 1851 } 1852 } 1853 done: 1854 sx_sunlock(&proctree_lock); 1855 } 1856 1857 /* 1858 * The 'hook' invoked from the kernel proper 1859 */ 1860 1861 1862 #ifdef HWPMC_DEBUG 1863 const char *pmc_hooknames[] = { 1864 /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */ 1865 "", 1866 "EXEC", 1867 "CSW-IN", 1868 "CSW-OUT", 1869 "SAMPLE", 1870 "UNUSED1", 1871 "UNUSED2", 1872 "MMAP", 1873 "MUNMAP", 1874 "CALLCHAIN-NMI", 1875 "CALLCHAIN-SOFT", 1876 "SOFTSAMPLING" 1877 }; 1878 #endif 1879 1880 static int 1881 pmc_hook_handler(struct thread *td, int function, void *arg) 1882 { 1883 1884 PMCDBG4(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, 1885 pmc_hooknames[function], arg); 1886 1887 switch (function) 1888 { 1889 1890 /* 1891 * Process exec() 1892 */ 1893 1894 case PMC_FN_PROCESS_EXEC: 1895 { 1896 char *fullpath, *freepath; 1897 unsigned int ri; 1898 int is_using_hwpmcs; 1899 struct pmc *pm; 1900 struct proc *p; 1901 struct pmc_owner *po; 1902 struct pmc_process *pp; 1903 struct pmckern_procexec *pk; 1904 1905 sx_assert(&pmc_sx, SX_XLOCKED); 1906 1907 p = td->td_proc; 1908 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 1909 1910 pk = (struct pmckern_procexec *) arg; 1911 1912 epoch_enter(global_epoch); 1913 /* Inform owners of SS mode PMCs of the exec event. */ 1914 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1915 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1916 pmclog_process_procexec(po, PMC_ID_INVALID, 1917 p->p_pid, pk->pm_entryaddr, fullpath); 1918 epoch_exit(global_epoch); 1919 1920 PROC_LOCK(p); 1921 is_using_hwpmcs = p->p_flag & P_HWPMC; 1922 PROC_UNLOCK(p); 1923 1924 if (!is_using_hwpmcs) { 1925 if (freepath) 1926 free(freepath, M_TEMP); 1927 break; 1928 } 1929 1930 /* 1931 * PMCs are not inherited across an exec(): remove any 1932 * PMCs that this process is the owner of. 1933 */ 1934 1935 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 1936 pmc_remove_owner(po); 1937 pmc_destroy_owner_descriptor(po); 1938 } 1939 1940 /* 1941 * If the process being exec'ed is not the target of any 1942 * PMC, we are done. 1943 */ 1944 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { 1945 if (freepath) 1946 free(freepath, M_TEMP); 1947 break; 1948 } 1949 1950 /* 1951 * Log the exec event to all monitoring owners. Skip 1952 * owners who have already received the event because 1953 * they had system sampling PMCs active. 1954 */ 1955 for (ri = 0; ri < md->pmd_npmc; ri++) 1956 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 1957 po = pm->pm_owner; 1958 if (po->po_sscount == 0 && 1959 po->po_flags & PMC_PO_OWNS_LOGFILE) 1960 pmclog_process_procexec(po, pm->pm_id, 1961 p->p_pid, pk->pm_entryaddr, 1962 fullpath); 1963 } 1964 1965 if (freepath) 1966 free(freepath, M_TEMP); 1967 1968 1969 PMCDBG4(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", 1970 p, p->p_pid, p->p_comm, pk->pm_credentialschanged); 1971 1972 if (pk->pm_credentialschanged == 0) /* no change */ 1973 break; 1974 1975 /* 1976 * If the newly exec()'ed process has a different credential 1977 * than before, allow it to be the target of a PMC only if 1978 * the PMC's owner has sufficient privilege. 1979 */ 1980 1981 for (ri = 0; ri < md->pmd_npmc; ri++) 1982 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) 1983 if (pmc_can_attach(pm, td->td_proc) != 0) 1984 pmc_detach_one_process(td->td_proc, 1985 pm, PMC_FLAG_NONE); 1986 1987 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt <= (int) md->pmd_npmc, 1988 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, 1989 pp->pp_refcnt, pp)); 1990 1991 /* 1992 * If this process is no longer the target of any 1993 * PMCs, we can remove the process entry and free 1994 * up space. 1995 */ 1996 1997 if (pp->pp_refcnt == 0) { 1998 pmc_remove_process_descriptor(pp); 1999 free(pp, M_PMC); 2000 break; 2001 } 2002 2003 } 2004 break; 2005 2006 case PMC_FN_CSW_IN: 2007 pmc_process_csw_in(td); 2008 break; 2009 2010 case PMC_FN_CSW_OUT: 2011 pmc_process_csw_out(td); 2012 break; 2013 2014 /* 2015 * Process accumulated PC samples. 2016 * 2017 * This function is expected to be called by hardclock() for 2018 * each CPU that has accumulated PC samples. 2019 * 2020 * This function is to be executed on the CPU whose samples 2021 * are being processed. 2022 */ 2023 case PMC_FN_DO_SAMPLES: 2024 2025 /* 2026 * Clear the cpu specific bit in the CPU mask before 2027 * do the rest of the processing. If the NMI handler 2028 * gets invoked after the "atomic_clear_int()" call 2029 * below but before "pmc_process_samples()" gets 2030 * around to processing the interrupt, then we will 2031 * come back here at the next hardclock() tick (and 2032 * may find nothing to do if "pmc_process_samples()" 2033 * had already processed the interrupt). We don't 2034 * lose the interrupt sample. 2035 */ 2036 DPCPU_SET(pmc_sampled, 0); 2037 pmc_process_samples(PCPU_GET(cpuid), PMC_HR); 2038 pmc_process_samples(PCPU_GET(cpuid), PMC_SR); 2039 break; 2040 2041 case PMC_FN_MMAP: 2042 MPASS(in_epoch() || sx_xlocked(&pmc_sx)); 2043 pmc_process_mmap(td, (struct pmckern_map_in *) arg); 2044 break; 2045 2046 case PMC_FN_MUNMAP: 2047 MPASS(in_epoch() || sx_xlocked(&pmc_sx)); 2048 pmc_process_munmap(td, (struct pmckern_map_out *) arg); 2049 break; 2050 2051 case PMC_FN_USER_CALLCHAIN: 2052 /* 2053 * Record a call chain. 2054 */ 2055 KASSERT(td == curthread, ("[pmc,%d] td != curthread", 2056 __LINE__)); 2057 2058 pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, 2059 (struct trapframe *) arg); 2060 td->td_pflags &= ~TDP_CALLCHAIN; 2061 break; 2062 2063 case PMC_FN_USER_CALLCHAIN_SOFT: 2064 /* 2065 * Record a call chain. 2066 */ 2067 KASSERT(td == curthread, ("[pmc,%d] td != curthread", 2068 __LINE__)); 2069 pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR, 2070 (struct trapframe *) arg); 2071 td->td_pflags &= ~TDP_CALLCHAIN; 2072 break; 2073 2074 case PMC_FN_SOFT_SAMPLING: 2075 /* 2076 * Call soft PMC sampling intr. 2077 */ 2078 pmc_soft_intr((struct pmckern_soft *) arg); 2079 break; 2080 2081 default: 2082 #ifdef HWPMC_DEBUG 2083 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); 2084 #endif 2085 break; 2086 2087 } 2088 2089 return 0; 2090 } 2091 2092 /* 2093 * allocate a 'struct pmc_owner' descriptor in the owner hash table. 2094 */ 2095 2096 static struct pmc_owner * 2097 pmc_allocate_owner_descriptor(struct proc *p) 2098 { 2099 uint32_t hindex; 2100 struct pmc_owner *po; 2101 struct pmc_ownerhash *poh; 2102 2103 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 2104 poh = &pmc_ownerhash[hindex]; 2105 2106 /* allocate space for N pointers and one descriptor struct */ 2107 po = malloc(sizeof(struct pmc_owner), M_PMC, M_WAITOK|M_ZERO); 2108 po->po_owner = p; 2109 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ 2110 2111 TAILQ_INIT(&po->po_logbuffers); 2112 mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); 2113 2114 PMCDBG4(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", 2115 p, p->p_pid, p->p_comm, po); 2116 2117 return po; 2118 } 2119 2120 static void 2121 pmc_destroy_owner_descriptor(struct pmc_owner *po) 2122 { 2123 2124 PMCDBG4(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", 2125 po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); 2126 2127 mtx_destroy(&po->po_mtx); 2128 free(po, M_PMC); 2129 } 2130 2131 /* 2132 * find the descriptor corresponding to process 'p', adding or removing it 2133 * as specified by 'mode'. 2134 */ 2135 2136 static struct pmc_process * 2137 pmc_find_process_descriptor(struct proc *p, uint32_t mode) 2138 { 2139 uint32_t hindex; 2140 struct pmc_process *pp, *ppnew; 2141 struct pmc_processhash *pph; 2142 2143 hindex = PMC_HASH_PTR(p, pmc_processhashmask); 2144 pph = &pmc_processhash[hindex]; 2145 2146 ppnew = NULL; 2147 2148 /* 2149 * Pre-allocate memory in the FIND_ALLOCATE case since we 2150 * cannot call malloc(9) once we hold a spin lock. 2151 */ 2152 if (mode & PMC_FLAG_ALLOCATE) 2153 ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * 2154 sizeof(struct pmc_targetstate), M_PMC, M_WAITOK|M_ZERO); 2155 2156 mtx_lock_spin(&pmc_processhash_mtx); 2157 LIST_FOREACH(pp, pph, pp_next) 2158 if (pp->pp_proc == p) 2159 break; 2160 2161 if ((mode & PMC_FLAG_REMOVE) && pp != NULL) 2162 LIST_REMOVE(pp, pp_next); 2163 2164 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && 2165 ppnew != NULL) { 2166 ppnew->pp_proc = p; 2167 LIST_INSERT_HEAD(pph, ppnew, pp_next); 2168 pp = ppnew; 2169 ppnew = NULL; 2170 } 2171 mtx_unlock_spin(&pmc_processhash_mtx); 2172 2173 if (pp != NULL && ppnew != NULL) 2174 free(ppnew, M_PMC); 2175 2176 return pp; 2177 } 2178 2179 /* 2180 * remove a process descriptor from the process hash table. 2181 */ 2182 2183 static void 2184 pmc_remove_process_descriptor(struct pmc_process *pp) 2185 { 2186 KASSERT(pp->pp_refcnt == 0, 2187 ("[pmc,%d] Removing process descriptor %p with count %d", 2188 __LINE__, pp, pp->pp_refcnt)); 2189 2190 mtx_lock_spin(&pmc_processhash_mtx); 2191 LIST_REMOVE(pp, pp_next); 2192 mtx_unlock_spin(&pmc_processhash_mtx); 2193 } 2194 2195 2196 /* 2197 * find an owner descriptor corresponding to proc 'p' 2198 */ 2199 2200 static struct pmc_owner * 2201 pmc_find_owner_descriptor(struct proc *p) 2202 { 2203 uint32_t hindex; 2204 struct pmc_owner *po; 2205 struct pmc_ownerhash *poh; 2206 2207 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 2208 poh = &pmc_ownerhash[hindex]; 2209 2210 po = NULL; 2211 LIST_FOREACH(po, poh, po_next) 2212 if (po->po_owner == p) 2213 break; 2214 2215 PMCDBG5(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " 2216 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); 2217 2218 return po; 2219 } 2220 2221 /* 2222 * pmc_allocate_pmc_descriptor 2223 * 2224 * Allocate a pmc descriptor and initialize its 2225 * fields. 2226 */ 2227 2228 static struct pmc * 2229 pmc_allocate_pmc_descriptor(void) 2230 { 2231 struct pmc *pmc; 2232 2233 pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); 2234 pmc->pm_runcount = counter_u64_alloc(M_WAITOK); 2235 pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO); 2236 PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); 2237 2238 return pmc; 2239 } 2240 2241 /* 2242 * Destroy a pmc descriptor. 2243 */ 2244 2245 static void 2246 pmc_destroy_pmc_descriptor(struct pmc *pm) 2247 { 2248 2249 KASSERT(pm->pm_state == PMC_STATE_DELETED || 2250 pm->pm_state == PMC_STATE_FREE, 2251 ("[pmc,%d] destroying non-deleted PMC", __LINE__)); 2252 KASSERT(LIST_EMPTY(&pm->pm_targets), 2253 ("[pmc,%d] destroying pmc with targets", __LINE__)); 2254 KASSERT(pm->pm_owner == NULL, 2255 ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); 2256 KASSERT(counter_u64_fetch(pm->pm_runcount) == 0, 2257 ("[pmc,%d] pmc has non-zero run count %ld", __LINE__, 2258 (unsigned long)counter_u64_fetch(pm->pm_runcount))); 2259 2260 counter_u64_free(pm->pm_runcount); 2261 free(pm->pm_pcpu_state, M_PMC); 2262 free(pm, M_PMC); 2263 } 2264 2265 static void 2266 pmc_wait_for_pmc_idle(struct pmc *pm) 2267 { 2268 #ifdef HWPMC_DEBUG 2269 volatile int maxloop; 2270 2271 maxloop = 100 * pmc_cpu_max(); 2272 #endif 2273 /* 2274 * Loop (with a forced context switch) till the PMC's runcount 2275 * comes down to zero. 2276 */ 2277 while (counter_u64_fetch(pm->pm_runcount) > 0) { 2278 #ifdef HWPMC_DEBUG 2279 maxloop--; 2280 KASSERT(maxloop > 0, 2281 ("[pmc,%d] (ri%d, rc%ld) waiting too long for " 2282 "pmc to be free", __LINE__, 2283 PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount))); 2284 #endif 2285 pmc_force_context_switch(); 2286 } 2287 } 2288 2289 /* 2290 * This function does the following things: 2291 * 2292 * - detaches the PMC from hardware 2293 * - unlinks all target threads that were attached to it 2294 * - removes the PMC from its owner's list 2295 * - destroys the PMC private mutex 2296 * 2297 * Once this function completes, the given pmc pointer can be freed by 2298 * calling pmc_destroy_pmc_descriptor(). 2299 */ 2300 2301 static void 2302 pmc_release_pmc_descriptor(struct pmc *pm) 2303 { 2304 enum pmc_mode mode; 2305 struct pmc_hw *phw; 2306 u_int adjri, ri, cpu; 2307 struct pmc_owner *po; 2308 struct pmc_binding pb; 2309 struct pmc_process *pp; 2310 struct pmc_classdep *pcd; 2311 struct pmc_target *ptgt, *tmp; 2312 2313 sx_assert(&pmc_sx, SX_XLOCKED); 2314 2315 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); 2316 2317 ri = PMC_TO_ROWINDEX(pm); 2318 pcd = pmc_ri_to_classdep(md, ri, &adjri); 2319 mode = PMC_TO_MODE(pm); 2320 2321 PMCDBG3(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, 2322 mode); 2323 2324 /* 2325 * First, we take the PMC off hardware. 2326 */ 2327 cpu = 0; 2328 if (PMC_IS_SYSTEM_MODE(mode)) { 2329 2330 /* 2331 * A system mode PMC runs on a specific CPU. Switch 2332 * to this CPU and turn hardware off. 2333 */ 2334 pmc_save_cpu_binding(&pb); 2335 2336 cpu = PMC_TO_CPU(pm); 2337 2338 pmc_select_cpu(cpu); 2339 2340 /* switch off non-stalled CPUs */ 2341 pm->pm_pcpu_state[cpu].pps_cpustate = 0; 2342 if (pm->pm_state == PMC_STATE_RUNNING && 2343 pm->pm_pcpu_state[cpu].pps_stalled == 0) { 2344 2345 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 2346 2347 KASSERT(phw->phw_pmc == pm, 2348 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", 2349 __LINE__, ri, phw->phw_pmc, pm)); 2350 PMCDBG2(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); 2351 2352 critical_enter(); 2353 pcd->pcd_stop_pmc(cpu, adjri); 2354 critical_exit(); 2355 } 2356 2357 PMCDBG2(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); 2358 2359 critical_enter(); 2360 pcd->pcd_config_pmc(cpu, adjri, NULL); 2361 critical_exit(); 2362 2363 /* adjust the global and process count of SS mode PMCs */ 2364 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { 2365 po = pm->pm_owner; 2366 po->po_sscount--; 2367 if (po->po_sscount == 0) { 2368 atomic_subtract_rel_int(&pmc_ss_count, 1); 2369 CK_LIST_REMOVE(po, po_ssnext); 2370 epoch_wait(global_epoch); 2371 } 2372 } 2373 2374 pm->pm_state = PMC_STATE_DELETED; 2375 2376 pmc_restore_cpu_binding(&pb); 2377 2378 /* 2379 * We could have references to this PMC structure in 2380 * the per-cpu sample queues. Wait for the queue to 2381 * drain. 2382 */ 2383 pmc_wait_for_pmc_idle(pm); 2384 2385 } else if (PMC_IS_VIRTUAL_MODE(mode)) { 2386 2387 /* 2388 * A virtual PMC could be running on multiple CPUs at 2389 * a given instant. 2390 * 2391 * By marking its state as DELETED, we ensure that 2392 * this PMC is never further scheduled on hardware. 2393 * 2394 * Then we wait till all CPUs are done with this PMC. 2395 */ 2396 pm->pm_state = PMC_STATE_DELETED; 2397 2398 2399 /* Wait for the PMCs runcount to come to zero. */ 2400 pmc_wait_for_pmc_idle(pm); 2401 2402 /* 2403 * At this point the PMC is off all CPUs and cannot be 2404 * freshly scheduled onto a CPU. It is now safe to 2405 * unlink all targets from this PMC. If a 2406 * process-record's refcount falls to zero, we remove 2407 * it from the hash table. The module-wide SX lock 2408 * protects us from races. 2409 */ 2410 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { 2411 pp = ptgt->pt_process; 2412 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ 2413 2414 PMCDBG1(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); 2415 2416 /* 2417 * If the target process record shows that no 2418 * PMCs are attached to it, reclaim its space. 2419 */ 2420 2421 if (pp->pp_refcnt == 0) { 2422 pmc_remove_process_descriptor(pp); 2423 free(pp, M_PMC); 2424 } 2425 } 2426 2427 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ 2428 2429 } 2430 2431 /* 2432 * Release any MD resources 2433 */ 2434 (void) pcd->pcd_release_pmc(cpu, adjri, pm); 2435 2436 /* 2437 * Update row disposition 2438 */ 2439 2440 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) 2441 PMC_UNMARK_ROW_STANDALONE(ri); 2442 else 2443 PMC_UNMARK_ROW_THREAD(ri); 2444 2445 /* unlink from the owner's list */ 2446 if (pm->pm_owner) { 2447 LIST_REMOVE(pm, pm_next); 2448 pm->pm_owner = NULL; 2449 } 2450 } 2451 2452 /* 2453 * Register an owner and a pmc. 2454 */ 2455 2456 static int 2457 pmc_register_owner(struct proc *p, struct pmc *pmc) 2458 { 2459 struct pmc_owner *po; 2460 2461 sx_assert(&pmc_sx, SX_XLOCKED); 2462 2463 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2464 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) 2465 return ENOMEM; 2466 2467 KASSERT(pmc->pm_owner == NULL, 2468 ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); 2469 pmc->pm_owner = po; 2470 2471 LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); 2472 2473 PROC_LOCK(p); 2474 p->p_flag |= P_HWPMC; 2475 PROC_UNLOCK(p); 2476 2477 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 2478 pmclog_process_pmcallocate(pmc); 2479 2480 PMCDBG2(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", 2481 po, pmc); 2482 2483 return 0; 2484 } 2485 2486 /* 2487 * Return the current row disposition: 2488 * == 0 => FREE 2489 * > 0 => PROCESS MODE 2490 * < 0 => SYSTEM MODE 2491 */ 2492 2493 int 2494 pmc_getrowdisp(int ri) 2495 { 2496 return pmc_pmcdisp[ri]; 2497 } 2498 2499 /* 2500 * Check if a PMC at row index 'ri' can be allocated to the current 2501 * process. 2502 * 2503 * Allocation can fail if: 2504 * - the current process is already being profiled by a PMC at index 'ri', 2505 * attached to it via OP_PMCATTACH. 2506 * - the current process has already allocated a PMC at index 'ri' 2507 * via OP_ALLOCATE. 2508 */ 2509 2510 static int 2511 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) 2512 { 2513 enum pmc_mode mode; 2514 struct pmc *pm; 2515 struct pmc_owner *po; 2516 struct pmc_process *pp; 2517 2518 PMCDBG5(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " 2519 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); 2520 2521 /* 2522 * We shouldn't have already allocated a process-mode PMC at 2523 * row index 'ri'. 2524 * 2525 * We shouldn't have allocated a system-wide PMC on the same 2526 * CPU and same RI. 2527 */ 2528 if ((po = pmc_find_owner_descriptor(p)) != NULL) 2529 LIST_FOREACH(pm, &po->po_pmcs, pm_next) { 2530 if (PMC_TO_ROWINDEX(pm) == ri) { 2531 mode = PMC_TO_MODE(pm); 2532 if (PMC_IS_VIRTUAL_MODE(mode)) 2533 return EEXIST; 2534 if (PMC_IS_SYSTEM_MODE(mode) && 2535 (int) PMC_TO_CPU(pm) == cpu) 2536 return EEXIST; 2537 } 2538 } 2539 2540 /* 2541 * We also shouldn't be the target of any PMC at this index 2542 * since otherwise a PMC_ATTACH to ourselves will fail. 2543 */ 2544 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) 2545 if (pp->pp_pmcs[ri].pp_pmc) 2546 return EEXIST; 2547 2548 PMCDBG4(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", 2549 p, p->p_pid, p->p_comm, ri); 2550 2551 return 0; 2552 } 2553 2554 /* 2555 * Check if a given PMC at row index 'ri' can be currently used in 2556 * mode 'mode'. 2557 */ 2558 2559 static int 2560 pmc_can_allocate_row(int ri, enum pmc_mode mode) 2561 { 2562 enum pmc_disp disp; 2563 2564 sx_assert(&pmc_sx, SX_XLOCKED); 2565 2566 PMCDBG2(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); 2567 2568 if (PMC_IS_SYSTEM_MODE(mode)) 2569 disp = PMC_DISP_STANDALONE; 2570 else 2571 disp = PMC_DISP_THREAD; 2572 2573 /* 2574 * check disposition for PMC row 'ri': 2575 * 2576 * Expected disposition Row-disposition Result 2577 * 2578 * STANDALONE STANDALONE or FREE proceed 2579 * STANDALONE THREAD fail 2580 * THREAD THREAD or FREE proceed 2581 * THREAD STANDALONE fail 2582 */ 2583 2584 if (!PMC_ROW_DISP_IS_FREE(ri) && 2585 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && 2586 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) 2587 return EBUSY; 2588 2589 /* 2590 * All OK 2591 */ 2592 2593 PMCDBG2(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); 2594 2595 return 0; 2596 2597 } 2598 2599 /* 2600 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. 2601 */ 2602 2603 static struct pmc * 2604 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) 2605 { 2606 struct pmc *pm; 2607 2608 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, 2609 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, 2610 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); 2611 2612 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2613 if (pm->pm_id == pmcid) 2614 return pm; 2615 2616 return NULL; 2617 } 2618 2619 static int 2620 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) 2621 { 2622 2623 struct pmc *pm, *opm; 2624 struct pmc_owner *po; 2625 struct pmc_process *pp; 2626 2627 PMCDBG1(PMC,FND,1, "find-pmc id=%d", pmcid); 2628 if (PMC_ID_TO_ROWINDEX(pmcid) >= md->pmd_npmc) 2629 return (EINVAL); 2630 2631 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) { 2632 /* 2633 * In case of PMC_F_DESCENDANTS child processes we will not find 2634 * the current process in the owners hash list. Find the owner 2635 * process first and from there lookup the po. 2636 */ 2637 if ((pp = pmc_find_process_descriptor(curthread->td_proc, 2638 PMC_FLAG_NONE)) == NULL) { 2639 return ESRCH; 2640 } else { 2641 opm = pp->pp_pmcs[PMC_ID_TO_ROWINDEX(pmcid)].pp_pmc; 2642 if (opm == NULL) 2643 return ESRCH; 2644 if ((opm->pm_flags & (PMC_F_ATTACHED_TO_OWNER| 2645 PMC_F_DESCENDANTS)) != (PMC_F_ATTACHED_TO_OWNER| 2646 PMC_F_DESCENDANTS)) 2647 return ESRCH; 2648 po = opm->pm_owner; 2649 } 2650 } 2651 2652 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) 2653 return EINVAL; 2654 2655 PMCDBG2(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); 2656 2657 *pmc = pm; 2658 return 0; 2659 } 2660 2661 /* 2662 * Start a PMC. 2663 */ 2664 2665 static int 2666 pmc_start(struct pmc *pm) 2667 { 2668 enum pmc_mode mode; 2669 struct pmc_owner *po; 2670 struct pmc_binding pb; 2671 struct pmc_classdep *pcd; 2672 int adjri, error, cpu, ri; 2673 2674 KASSERT(pm != NULL, 2675 ("[pmc,%d] null pm", __LINE__)); 2676 2677 mode = PMC_TO_MODE(pm); 2678 ri = PMC_TO_ROWINDEX(pm); 2679 pcd = pmc_ri_to_classdep(md, ri, &adjri); 2680 2681 error = 0; 2682 2683 PMCDBG3(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); 2684 2685 po = pm->pm_owner; 2686 2687 /* 2688 * Disallow PMCSTART if a logfile is required but has not been 2689 * configured yet. 2690 */ 2691 if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && 2692 (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) 2693 return (EDOOFUS); /* programming error */ 2694 2695 /* 2696 * If this is a sampling mode PMC, log mapping information for 2697 * the kernel modules that are currently loaded. 2698 */ 2699 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2700 pmc_log_kernel_mappings(pm); 2701 2702 if (PMC_IS_VIRTUAL_MODE(mode)) { 2703 2704 /* 2705 * If a PMCATTACH has never been done on this PMC, 2706 * attach it to its owner process. 2707 */ 2708 2709 if (LIST_EMPTY(&pm->pm_targets)) 2710 error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : 2711 pmc_attach_process(po->po_owner, pm); 2712 2713 /* 2714 * If the PMC is attached to its owner, then force a context 2715 * switch to ensure that the MD state gets set correctly. 2716 */ 2717 2718 if (error == 0) { 2719 pm->pm_state = PMC_STATE_RUNNING; 2720 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) 2721 pmc_force_context_switch(); 2722 } 2723 2724 return (error); 2725 } 2726 2727 2728 /* 2729 * A system-wide PMC. 2730 * 2731 * Add the owner to the global list if this is a system-wide 2732 * sampling PMC. 2733 */ 2734 2735 if (mode == PMC_MODE_SS) { 2736 /* 2737 * Log mapping information for all existing processes in the 2738 * system. Subsequent mappings are logged as they happen; 2739 * see pmc_process_mmap(). 2740 */ 2741 if (po->po_logprocmaps == 0) { 2742 pmc_log_all_process_mappings(po); 2743 po->po_logprocmaps = 1; 2744 } 2745 po->po_sscount++; 2746 if (po->po_sscount == 1) { 2747 atomic_add_rel_int(&pmc_ss_count, 1); 2748 CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); 2749 PMCDBG1(PMC,OPS,1, "po=%p in global list", po); 2750 } 2751 } 2752 2753 /* 2754 * Move to the CPU associated with this 2755 * PMC, and start the hardware. 2756 */ 2757 2758 pmc_save_cpu_binding(&pb); 2759 2760 cpu = PMC_TO_CPU(pm); 2761 2762 if (!pmc_cpu_is_active(cpu)) 2763 return (ENXIO); 2764 2765 pmc_select_cpu(cpu); 2766 2767 /* 2768 * global PMCs are configured at allocation time 2769 * so write out the initial value and start the PMC. 2770 */ 2771 2772 pm->pm_state = PMC_STATE_RUNNING; 2773 2774 critical_enter(); 2775 if ((error = pcd->pcd_write_pmc(cpu, adjri, 2776 PMC_IS_SAMPLING_MODE(mode) ? 2777 pm->pm_sc.pm_reloadcount : 2778 pm->pm_sc.pm_initial)) == 0) { 2779 /* If a sampling mode PMC, reset stalled state. */ 2780 if (PMC_IS_SAMPLING_MODE(mode)) 2781 pm->pm_pcpu_state[cpu].pps_stalled = 0; 2782 2783 /* Indicate that we desire this to run. Start it. */ 2784 pm->pm_pcpu_state[cpu].pps_cpustate = 1; 2785 error = pcd->pcd_start_pmc(cpu, adjri); 2786 } 2787 critical_exit(); 2788 2789 pmc_restore_cpu_binding(&pb); 2790 2791 return (error); 2792 } 2793 2794 /* 2795 * Stop a PMC. 2796 */ 2797 2798 static int 2799 pmc_stop(struct pmc *pm) 2800 { 2801 struct pmc_owner *po; 2802 struct pmc_binding pb; 2803 struct pmc_classdep *pcd; 2804 int adjri, cpu, error, ri; 2805 2806 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); 2807 2808 PMCDBG3(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, 2809 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); 2810 2811 pm->pm_state = PMC_STATE_STOPPED; 2812 2813 /* 2814 * If the PMC is a virtual mode one, changing the state to 2815 * non-RUNNING is enough to ensure that the PMC never gets 2816 * scheduled. 2817 * 2818 * If this PMC is current running on a CPU, then it will 2819 * handled correctly at the time its target process is context 2820 * switched out. 2821 */ 2822 2823 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 2824 return 0; 2825 2826 /* 2827 * A system-mode PMC. Move to the CPU associated with 2828 * this PMC, and stop the hardware. We update the 2829 * 'initial count' so that a subsequent PMCSTART will 2830 * resume counting from the current hardware count. 2831 */ 2832 2833 pmc_save_cpu_binding(&pb); 2834 2835 cpu = PMC_TO_CPU(pm); 2836 2837 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 2838 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); 2839 2840 if (!pmc_cpu_is_active(cpu)) 2841 return ENXIO; 2842 2843 pmc_select_cpu(cpu); 2844 2845 ri = PMC_TO_ROWINDEX(pm); 2846 pcd = pmc_ri_to_classdep(md, ri, &adjri); 2847 2848 pm->pm_pcpu_state[cpu].pps_cpustate = 0; 2849 critical_enter(); 2850 if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) 2851 error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); 2852 critical_exit(); 2853 2854 pmc_restore_cpu_binding(&pb); 2855 2856 po = pm->pm_owner; 2857 2858 /* remove this owner from the global list of SS PMC owners */ 2859 if (PMC_TO_MODE(pm) == PMC_MODE_SS) { 2860 po->po_sscount--; 2861 if (po->po_sscount == 0) { 2862 atomic_subtract_rel_int(&pmc_ss_count, 1); 2863 CK_LIST_REMOVE(po, po_ssnext); 2864 epoch_wait(global_epoch); 2865 PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po); 2866 } 2867 } 2868 2869 return (error); 2870 } 2871 2872 2873 #ifdef HWPMC_DEBUG 2874 static const char *pmc_op_to_name[] = { 2875 #undef __PMC_OP 2876 #define __PMC_OP(N, D) #N , 2877 __PMC_OPS() 2878 NULL 2879 }; 2880 #endif 2881 2882 /* 2883 * The syscall interface 2884 */ 2885 2886 #define PMC_GET_SX_XLOCK(...) do { \ 2887 sx_xlock(&pmc_sx); \ 2888 if (pmc_hook == NULL) { \ 2889 sx_xunlock(&pmc_sx); \ 2890 return __VA_ARGS__; \ 2891 } \ 2892 } while (0) 2893 2894 #define PMC_DOWNGRADE_SX() do { \ 2895 sx_downgrade(&pmc_sx); \ 2896 is_sx_downgraded = 1; \ 2897 } while (0) 2898 2899 static int 2900 pmc_syscall_handler(struct thread *td, void *syscall_args) 2901 { 2902 int error, is_sx_downgraded, op; 2903 struct pmc_syscall_args *c; 2904 void *pmclog_proc_handle; 2905 void *arg; 2906 2907 c = (struct pmc_syscall_args *)syscall_args; 2908 op = c->pmop_code; 2909 arg = c->pmop_data; 2910 /* PMC isn't set up yet */ 2911 if (pmc_hook == NULL) 2912 return (EINVAL); 2913 if (op == PMC_OP_CONFIGURELOG) { 2914 /* 2915 * We cannot create the logging process inside 2916 * pmclog_configure_log() because there is a LOR 2917 * between pmc_sx and process structure locks. 2918 * Instead, pre-create the process and ignite the loop 2919 * if everything is fine, otherwise direct the process 2920 * to exit. 2921 */ 2922 error = pmclog_proc_create(td, &pmclog_proc_handle); 2923 if (error != 0) 2924 goto done_syscall; 2925 } 2926 2927 PMC_GET_SX_XLOCK(ENOSYS); 2928 is_sx_downgraded = 0; 2929 PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, 2930 pmc_op_to_name[op], arg); 2931 2932 error = 0; 2933 counter_u64_add(pmc_stats.pm_syscalls, 1); 2934 2935 switch (op) { 2936 2937 2938 /* 2939 * Configure a log file. 2940 * 2941 * XXX This OP will be reworked. 2942 */ 2943 2944 case PMC_OP_CONFIGURELOG: 2945 { 2946 struct proc *p; 2947 struct pmc *pm; 2948 struct pmc_owner *po; 2949 struct pmc_op_configurelog cl; 2950 2951 if ((error = copyin(arg, &cl, sizeof(cl))) != 0) { 2952 pmclog_proc_ignite(pmclog_proc_handle, NULL); 2953 break; 2954 } 2955 2956 /* mark this process as owning a log file */ 2957 p = td->td_proc; 2958 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2959 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { 2960 pmclog_proc_ignite(pmclog_proc_handle, NULL); 2961 error = ENOMEM; 2962 break; 2963 } 2964 2965 /* 2966 * If a valid fd was passed in, try to configure that, 2967 * otherwise if 'fd' was less than zero and there was 2968 * a log file configured, flush its buffers and 2969 * de-configure it. 2970 */ 2971 if (cl.pm_logfd >= 0) { 2972 error = pmclog_configure_log(md, po, cl.pm_logfd); 2973 pmclog_proc_ignite(pmclog_proc_handle, error == 0 ? 2974 po : NULL); 2975 } else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { 2976 pmclog_proc_ignite(pmclog_proc_handle, NULL); 2977 error = pmclog_close(po); 2978 if (error == 0) { 2979 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2980 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 2981 pm->pm_state == PMC_STATE_RUNNING) 2982 pmc_stop(pm); 2983 error = pmclog_deconfigure_log(po); 2984 } 2985 } else { 2986 pmclog_proc_ignite(pmclog_proc_handle, NULL); 2987 error = EINVAL; 2988 } 2989 } 2990 break; 2991 2992 /* 2993 * Flush a log file. 2994 */ 2995 2996 case PMC_OP_FLUSHLOG: 2997 { 2998 struct pmc_owner *po; 2999 3000 sx_assert(&pmc_sx, SX_XLOCKED); 3001 3002 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 3003 error = EINVAL; 3004 break; 3005 } 3006 3007 error = pmclog_flush(po); 3008 } 3009 break; 3010 3011 /* 3012 * Close a log file. 3013 */ 3014 3015 case PMC_OP_CLOSELOG: 3016 { 3017 struct pmc_owner *po; 3018 3019 sx_assert(&pmc_sx, SX_XLOCKED); 3020 3021 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 3022 error = EINVAL; 3023 break; 3024 } 3025 3026 error = pmclog_close(po); 3027 } 3028 break; 3029 3030 /* 3031 * Retrieve hardware configuration. 3032 */ 3033 3034 case PMC_OP_GETCPUINFO: /* CPU information */ 3035 { 3036 struct pmc_op_getcpuinfo gci; 3037 struct pmc_classinfo *pci; 3038 struct pmc_classdep *pcd; 3039 int cl; 3040 3041 gci.pm_cputype = md->pmd_cputype; 3042 gci.pm_ncpu = pmc_cpu_max(); 3043 gci.pm_npmc = md->pmd_npmc; 3044 gci.pm_nclass = md->pmd_nclass; 3045 pci = gci.pm_classes; 3046 pcd = md->pmd_classdep; 3047 for (cl = 0; cl < md->pmd_nclass; cl++, pci++, pcd++) { 3048 pci->pm_caps = pcd->pcd_caps; 3049 pci->pm_class = pcd->pcd_class; 3050 pci->pm_width = pcd->pcd_width; 3051 pci->pm_num = pcd->pcd_num; 3052 } 3053 error = copyout(&gci, arg, sizeof(gci)); 3054 } 3055 break; 3056 3057 /* 3058 * Retrieve soft events list. 3059 */ 3060 case PMC_OP_GETDYNEVENTINFO: 3061 { 3062 enum pmc_class cl; 3063 enum pmc_event ev; 3064 struct pmc_op_getdyneventinfo *gei; 3065 struct pmc_dyn_event_descr dev; 3066 struct pmc_soft *ps; 3067 uint32_t nevent; 3068 3069 sx_assert(&pmc_sx, SX_LOCKED); 3070 3071 gei = (struct pmc_op_getdyneventinfo *) arg; 3072 3073 if ((error = copyin(&gei->pm_class, &cl, sizeof(cl))) != 0) 3074 break; 3075 3076 /* Only SOFT class is dynamic. */ 3077 if (cl != PMC_CLASS_SOFT) { 3078 error = EINVAL; 3079 break; 3080 } 3081 3082 nevent = 0; 3083 for (ev = PMC_EV_SOFT_FIRST; (int)ev <= PMC_EV_SOFT_LAST; ev++) { 3084 ps = pmc_soft_ev_acquire(ev); 3085 if (ps == NULL) 3086 continue; 3087 bcopy(&ps->ps_ev, &dev, sizeof(dev)); 3088 pmc_soft_ev_release(ps); 3089 3090 error = copyout(&dev, 3091 &gei->pm_events[nevent], 3092 sizeof(struct pmc_dyn_event_descr)); 3093 if (error != 0) 3094 break; 3095 nevent++; 3096 } 3097 if (error != 0) 3098 break; 3099 3100 error = copyout(&nevent, &gei->pm_nevent, 3101 sizeof(nevent)); 3102 } 3103 break; 3104 3105 /* 3106 * Get module statistics 3107 */ 3108 3109 case PMC_OP_GETDRIVERSTATS: 3110 { 3111 struct pmc_op_getdriverstats gms; 3112 #define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field) 3113 CFETCH(gms, pmc_stats, pm_intr_ignored); 3114 CFETCH(gms, pmc_stats, pm_intr_processed); 3115 CFETCH(gms, pmc_stats, pm_intr_bufferfull); 3116 CFETCH(gms, pmc_stats, pm_syscalls); 3117 CFETCH(gms, pmc_stats, pm_syscall_errors); 3118 CFETCH(gms, pmc_stats, pm_buffer_requests); 3119 CFETCH(gms, pmc_stats, pm_buffer_requests_failed); 3120 CFETCH(gms, pmc_stats, pm_log_sweeps); 3121 #undef CFETCH 3122 error = copyout(&gms, arg, sizeof(gms)); 3123 } 3124 break; 3125 3126 3127 /* 3128 * Retrieve module version number 3129 */ 3130 3131 case PMC_OP_GETMODULEVERSION: 3132 { 3133 uint32_t cv, modv; 3134 3135 /* retrieve the client's idea of the ABI version */ 3136 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) 3137 break; 3138 /* don't service clients newer than our driver */ 3139 modv = PMC_VERSION; 3140 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { 3141 error = EPROGMISMATCH; 3142 break; 3143 } 3144 error = copyout(&modv, arg, sizeof(int)); 3145 } 3146 break; 3147 3148 3149 /* 3150 * Retrieve the state of all the PMCs on a given 3151 * CPU. 3152 */ 3153 3154 case PMC_OP_GETPMCINFO: 3155 { 3156 int ari; 3157 struct pmc *pm; 3158 size_t pmcinfo_size; 3159 uint32_t cpu, n, npmc; 3160 struct pmc_owner *po; 3161 struct pmc_binding pb; 3162 struct pmc_classdep *pcd; 3163 struct pmc_info *p, *pmcinfo; 3164 struct pmc_op_getpmcinfo *gpi; 3165 3166 PMC_DOWNGRADE_SX(); 3167 3168 gpi = (struct pmc_op_getpmcinfo *) arg; 3169 3170 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) 3171 break; 3172 3173 if (cpu >= pmc_cpu_max()) { 3174 error = EINVAL; 3175 break; 3176 } 3177 3178 if (!pmc_cpu_is_active(cpu)) { 3179 error = ENXIO; 3180 break; 3181 } 3182 3183 /* switch to CPU 'cpu' */ 3184 pmc_save_cpu_binding(&pb); 3185 pmc_select_cpu(cpu); 3186 3187 npmc = md->pmd_npmc; 3188 3189 pmcinfo_size = npmc * sizeof(struct pmc_info); 3190 pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); 3191 3192 p = pmcinfo; 3193 3194 for (n = 0; n < md->pmd_npmc; n++, p++) { 3195 3196 pcd = pmc_ri_to_classdep(md, n, &ari); 3197 3198 KASSERT(pcd != NULL, 3199 ("[pmc,%d] null pcd ri=%d", __LINE__, n)); 3200 3201 if ((error = pcd->pcd_describe(cpu, ari, p, &pm)) != 0) 3202 break; 3203 3204 if (PMC_ROW_DISP_IS_STANDALONE(n)) 3205 p->pm_rowdisp = PMC_DISP_STANDALONE; 3206 else if (PMC_ROW_DISP_IS_THREAD(n)) 3207 p->pm_rowdisp = PMC_DISP_THREAD; 3208 else 3209 p->pm_rowdisp = PMC_DISP_FREE; 3210 3211 p->pm_ownerpid = -1; 3212 3213 if (pm == NULL) /* no PMC associated */ 3214 continue; 3215 3216 po = pm->pm_owner; 3217 3218 KASSERT(po->po_owner != NULL, 3219 ("[pmc,%d] pmc_owner had a null proc pointer", 3220 __LINE__)); 3221 3222 p->pm_ownerpid = po->po_owner->p_pid; 3223 p->pm_mode = PMC_TO_MODE(pm); 3224 p->pm_event = pm->pm_event; 3225 p->pm_flags = pm->pm_flags; 3226 3227 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 3228 p->pm_reloadcount = 3229 pm->pm_sc.pm_reloadcount; 3230 } 3231 3232 pmc_restore_cpu_binding(&pb); 3233 3234 /* now copy out the PMC info collected */ 3235 if (error == 0) 3236 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); 3237 3238 free(pmcinfo, M_PMC); 3239 } 3240 break; 3241 3242 3243 /* 3244 * Set the administrative state of a PMC. I.e. whether 3245 * the PMC is to be used or not. 3246 */ 3247 3248 case PMC_OP_PMCADMIN: 3249 { 3250 int cpu, ri; 3251 enum pmc_state request; 3252 struct pmc_cpu *pc; 3253 struct pmc_hw *phw; 3254 struct pmc_op_pmcadmin pma; 3255 struct pmc_binding pb; 3256 3257 sx_assert(&pmc_sx, SX_XLOCKED); 3258 3259 KASSERT(td == curthread, 3260 ("[pmc,%d] td != curthread", __LINE__)); 3261 3262 error = priv_check(td, PRIV_PMC_MANAGE); 3263 if (error) 3264 break; 3265 3266 if ((error = copyin(arg, &pma, sizeof(pma))) != 0) 3267 break; 3268 3269 cpu = pma.pm_cpu; 3270 3271 if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { 3272 error = EINVAL; 3273 break; 3274 } 3275 3276 if (!pmc_cpu_is_active(cpu)) { 3277 error = ENXIO; 3278 break; 3279 } 3280 3281 request = pma.pm_state; 3282 3283 if (request != PMC_STATE_DISABLED && 3284 request != PMC_STATE_FREE) { 3285 error = EINVAL; 3286 break; 3287 } 3288 3289 ri = pma.pm_pmc; /* pmc id == row index */ 3290 if (ri < 0 || ri >= (int) md->pmd_npmc) { 3291 error = EINVAL; 3292 break; 3293 } 3294 3295 /* 3296 * We can't disable a PMC with a row-index allocated 3297 * for process virtual PMCs. 3298 */ 3299 3300 if (PMC_ROW_DISP_IS_THREAD(ri) && 3301 request == PMC_STATE_DISABLED) { 3302 error = EBUSY; 3303 break; 3304 } 3305 3306 /* 3307 * otherwise, this PMC on this CPU is either free or 3308 * in system-wide mode. 3309 */ 3310 3311 pmc_save_cpu_binding(&pb); 3312 pmc_select_cpu(cpu); 3313 3314 pc = pmc_pcpu[cpu]; 3315 phw = pc->pc_hwpmcs[ri]; 3316 3317 /* 3318 * XXX do we need some kind of 'forced' disable? 3319 */ 3320 3321 if (phw->phw_pmc == NULL) { 3322 if (request == PMC_STATE_DISABLED && 3323 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { 3324 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; 3325 PMC_MARK_ROW_STANDALONE(ri); 3326 } else if (request == PMC_STATE_FREE && 3327 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { 3328 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; 3329 PMC_UNMARK_ROW_STANDALONE(ri); 3330 } 3331 /* other cases are a no-op */ 3332 } else 3333 error = EBUSY; 3334 3335 pmc_restore_cpu_binding(&pb); 3336 } 3337 break; 3338 3339 3340 /* 3341 * Allocate a PMC. 3342 */ 3343 3344 case PMC_OP_PMCALLOCATE: 3345 { 3346 int adjri, n; 3347 u_int cpu; 3348 uint32_t caps; 3349 struct pmc *pmc; 3350 enum pmc_mode mode; 3351 struct pmc_hw *phw; 3352 struct pmc_binding pb; 3353 struct pmc_classdep *pcd; 3354 struct pmc_op_pmcallocate pa; 3355 3356 if ((error = copyin(arg, &pa, sizeof(pa))) != 0) 3357 break; 3358 3359 caps = pa.pm_caps; 3360 mode = pa.pm_mode; 3361 cpu = pa.pm_cpu; 3362 3363 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && 3364 mode != PMC_MODE_TS && mode != PMC_MODE_TC) || 3365 (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { 3366 error = EINVAL; 3367 break; 3368 } 3369 3370 /* 3371 * Virtual PMCs should only ask for a default CPU. 3372 * System mode PMCs need to specify a non-default CPU. 3373 */ 3374 3375 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || 3376 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { 3377 error = EINVAL; 3378 break; 3379 } 3380 3381 /* 3382 * Check that an inactive CPU is not being asked for. 3383 */ 3384 3385 if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { 3386 error = ENXIO; 3387 break; 3388 } 3389 3390 /* 3391 * Refuse an allocation for a system-wide PMC if this 3392 * process has been jailed, or if this process lacks 3393 * super-user credentials and the sysctl tunable 3394 * 'security.bsd.unprivileged_syspmcs' is zero. 3395 */ 3396 3397 if (PMC_IS_SYSTEM_MODE(mode)) { 3398 if (jailed(curthread->td_ucred)) { 3399 error = EPERM; 3400 break; 3401 } 3402 if (!pmc_unprivileged_syspmcs) { 3403 error = priv_check(curthread, 3404 PRIV_PMC_SYSTEM); 3405 if (error) 3406 break; 3407 } 3408 } 3409 3410 /* 3411 * Look for valid values for 'pm_flags' 3412 */ 3413 3414 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | 3415 PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { 3416 error = EINVAL; 3417 break; 3418 } 3419 3420 /* process logging options are not allowed for system PMCs */ 3421 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & 3422 (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { 3423 error = EINVAL; 3424 break; 3425 } 3426 3427 /* 3428 * All sampling mode PMCs need to be able to interrupt the 3429 * CPU. 3430 */ 3431 if (PMC_IS_SAMPLING_MODE(mode)) 3432 caps |= PMC_CAP_INTERRUPT; 3433 3434 /* A valid class specifier should have been passed in. */ 3435 for (n = 0; n < md->pmd_nclass; n++) 3436 if (md->pmd_classdep[n].pcd_class == pa.pm_class) 3437 break; 3438 if (n == md->pmd_nclass) { 3439 error = EINVAL; 3440 break; 3441 } 3442 3443 /* The requested PMC capabilities should be feasible. */ 3444 if ((md->pmd_classdep[n].pcd_caps & caps) != caps) { 3445 error = EOPNOTSUPP; 3446 break; 3447 } 3448 3449 PMCDBG4(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", 3450 pa.pm_ev, caps, mode, cpu); 3451 3452 pmc = pmc_allocate_pmc_descriptor(); 3453 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, 3454 PMC_ID_INVALID); 3455 pmc->pm_event = pa.pm_ev; 3456 pmc->pm_state = PMC_STATE_FREE; 3457 pmc->pm_caps = caps; 3458 pmc->pm_flags = pa.pm_flags; 3459 3460 /* switch thread to CPU 'cpu' */ 3461 pmc_save_cpu_binding(&pb); 3462 3463 #define PMC_IS_SHAREABLE_PMC(cpu, n) \ 3464 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ 3465 PMC_PHW_FLAG_IS_SHAREABLE) 3466 #define PMC_IS_UNALLOCATED(cpu, n) \ 3467 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) 3468 3469 if (PMC_IS_SYSTEM_MODE(mode)) { 3470 pmc_select_cpu(cpu); 3471 for (n = 0; n < (int) md->pmd_npmc; n++) { 3472 pcd = pmc_ri_to_classdep(md, n, &adjri); 3473 if (pmc_can_allocate_row(n, mode) == 0 && 3474 pmc_can_allocate_rowindex( 3475 curthread->td_proc, n, cpu) == 0 && 3476 (PMC_IS_UNALLOCATED(cpu, n) || 3477 PMC_IS_SHAREABLE_PMC(cpu, n)) && 3478 pcd->pcd_allocate_pmc(cpu, adjri, pmc, 3479 &pa) == 0) 3480 break; 3481 } 3482 } else { 3483 /* Process virtual mode */ 3484 for (n = 0; n < (int) md->pmd_npmc; n++) { 3485 pcd = pmc_ri_to_classdep(md, n, &adjri); 3486 if (pmc_can_allocate_row(n, mode) == 0 && 3487 pmc_can_allocate_rowindex( 3488 curthread->td_proc, n, 3489 PMC_CPU_ANY) == 0 && 3490 pcd->pcd_allocate_pmc(curthread->td_oncpu, 3491 adjri, pmc, &pa) == 0) 3492 break; 3493 } 3494 } 3495 3496 #undef PMC_IS_UNALLOCATED 3497 #undef PMC_IS_SHAREABLE_PMC 3498 3499 pmc_restore_cpu_binding(&pb); 3500 3501 if (n == (int) md->pmd_npmc) { 3502 pmc_destroy_pmc_descriptor(pmc); 3503 pmc = NULL; 3504 error = EINVAL; 3505 break; 3506 } 3507 3508 /* Fill in the correct value in the ID field */ 3509 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); 3510 3511 PMCDBG5(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", 3512 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); 3513 3514 /* Process mode PMCs with logging enabled need log files */ 3515 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) 3516 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3517 3518 /* All system mode sampling PMCs require a log file */ 3519 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) 3520 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3521 3522 /* 3523 * Configure global pmc's immediately 3524 */ 3525 3526 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { 3527 3528 pmc_save_cpu_binding(&pb); 3529 pmc_select_cpu(cpu); 3530 3531 phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; 3532 pcd = pmc_ri_to_classdep(md, n, &adjri); 3533 3534 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || 3535 (error = pcd->pcd_config_pmc(cpu, adjri, pmc)) != 0) { 3536 (void) pcd->pcd_release_pmc(cpu, adjri, pmc); 3537 pmc_destroy_pmc_descriptor(pmc); 3538 pmc = NULL; 3539 pmc_restore_cpu_binding(&pb); 3540 error = EPERM; 3541 break; 3542 } 3543 3544 pmc_restore_cpu_binding(&pb); 3545 } 3546 3547 pmc->pm_state = PMC_STATE_ALLOCATED; 3548 3549 /* 3550 * mark row disposition 3551 */ 3552 3553 if (PMC_IS_SYSTEM_MODE(mode)) 3554 PMC_MARK_ROW_STANDALONE(n); 3555 else 3556 PMC_MARK_ROW_THREAD(n); 3557 3558 /* 3559 * Register this PMC with the current thread as its owner. 3560 */ 3561 3562 if ((error = 3563 pmc_register_owner(curthread->td_proc, pmc)) != 0) { 3564 pmc_release_pmc_descriptor(pmc); 3565 pmc_destroy_pmc_descriptor(pmc); 3566 pmc = NULL; 3567 break; 3568 } 3569 3570 /* 3571 * Return the allocated index. 3572 */ 3573 3574 pa.pm_pmcid = pmc->pm_id; 3575 3576 error = copyout(&pa, arg, sizeof(pa)); 3577 } 3578 break; 3579 3580 3581 /* 3582 * Attach a PMC to a process. 3583 */ 3584 3585 case PMC_OP_PMCATTACH: 3586 { 3587 struct pmc *pm; 3588 struct proc *p; 3589 struct pmc_op_pmcattach a; 3590 3591 sx_assert(&pmc_sx, SX_XLOCKED); 3592 3593 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3594 break; 3595 3596 if (a.pm_pid < 0) { 3597 error = EINVAL; 3598 break; 3599 } else if (a.pm_pid == 0) 3600 a.pm_pid = td->td_proc->p_pid; 3601 3602 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3603 break; 3604 3605 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 3606 error = EINVAL; 3607 break; 3608 } 3609 3610 /* PMCs may be (re)attached only when allocated or stopped */ 3611 if (pm->pm_state == PMC_STATE_RUNNING) { 3612 error = EBUSY; 3613 break; 3614 } else if (pm->pm_state != PMC_STATE_ALLOCATED && 3615 pm->pm_state != PMC_STATE_STOPPED) { 3616 error = EINVAL; 3617 break; 3618 } 3619 3620 /* lookup pid */ 3621 if ((p = pfind(a.pm_pid)) == NULL) { 3622 error = ESRCH; 3623 break; 3624 } 3625 3626 /* 3627 * Ignore processes that are working on exiting. 3628 */ 3629 if (p->p_flag & P_WEXIT) { 3630 error = ESRCH; 3631 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3632 break; 3633 } 3634 3635 /* 3636 * we are allowed to attach a PMC to a process if 3637 * we can debug it. 3638 */ 3639 error = p_candebug(curthread, p); 3640 3641 PROC_UNLOCK(p); 3642 3643 if (error == 0) 3644 error = pmc_attach_process(p, pm); 3645 } 3646 break; 3647 3648 3649 /* 3650 * Detach an attached PMC from a process. 3651 */ 3652 3653 case PMC_OP_PMCDETACH: 3654 { 3655 struct pmc *pm; 3656 struct proc *p; 3657 struct pmc_op_pmcattach a; 3658 3659 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3660 break; 3661 3662 if (a.pm_pid < 0) { 3663 error = EINVAL; 3664 break; 3665 } else if (a.pm_pid == 0) 3666 a.pm_pid = td->td_proc->p_pid; 3667 3668 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3669 break; 3670 3671 if ((p = pfind(a.pm_pid)) == NULL) { 3672 error = ESRCH; 3673 break; 3674 } 3675 3676 /* 3677 * Treat processes that are in the process of exiting 3678 * as if they were not present. 3679 */ 3680 3681 if (p->p_flag & P_WEXIT) 3682 error = ESRCH; 3683 3684 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3685 3686 if (error == 0) 3687 error = pmc_detach_process(p, pm); 3688 } 3689 break; 3690 3691 3692 /* 3693 * Retrieve the MSR number associated with the counter 3694 * 'pmc_id'. This allows processes to directly use RDPMC 3695 * instructions to read their PMCs, without the overhead of a 3696 * system call. 3697 */ 3698 3699 case PMC_OP_PMCGETMSR: 3700 { 3701 int adjri, ri; 3702 struct pmc *pm; 3703 struct pmc_target *pt; 3704 struct pmc_op_getmsr gm; 3705 struct pmc_classdep *pcd; 3706 3707 PMC_DOWNGRADE_SX(); 3708 3709 if ((error = copyin(arg, &gm, sizeof(gm))) != 0) 3710 break; 3711 3712 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) 3713 break; 3714 3715 /* 3716 * The allocated PMC has to be a process virtual PMC, 3717 * i.e., of type MODE_T[CS]. Global PMCs can only be 3718 * read using the PMCREAD operation since they may be 3719 * allocated on a different CPU than the one we could 3720 * be running on at the time of the RDPMC instruction. 3721 * 3722 * The GETMSR operation is not allowed for PMCs that 3723 * are inherited across processes. 3724 */ 3725 3726 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || 3727 (pm->pm_flags & PMC_F_DESCENDANTS)) { 3728 error = EINVAL; 3729 break; 3730 } 3731 3732 /* 3733 * It only makes sense to use a RDPMC (or its 3734 * equivalent instruction on non-x86 architectures) on 3735 * a process that has allocated and attached a PMC to 3736 * itself. Conversely the PMC is only allowed to have 3737 * one process attached to it -- its owner. 3738 */ 3739 3740 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || 3741 LIST_NEXT(pt, pt_next) != NULL || 3742 pt->pt_process->pp_proc != pm->pm_owner->po_owner) { 3743 error = EINVAL; 3744 break; 3745 } 3746 3747 ri = PMC_TO_ROWINDEX(pm); 3748 pcd = pmc_ri_to_classdep(md, ri, &adjri); 3749 3750 /* PMC class has no 'GETMSR' support */ 3751 if (pcd->pcd_get_msr == NULL) { 3752 error = ENOSYS; 3753 break; 3754 } 3755 3756 if ((error = (*pcd->pcd_get_msr)(adjri, &gm.pm_msr)) < 0) 3757 break; 3758 3759 if ((error = copyout(&gm, arg, sizeof(gm))) < 0) 3760 break; 3761 3762 /* 3763 * Mark our process as using MSRs. Update machine 3764 * state using a forced context switch. 3765 */ 3766 3767 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; 3768 pmc_force_context_switch(); 3769 3770 } 3771 break; 3772 3773 /* 3774 * Release an allocated PMC 3775 */ 3776 3777 case PMC_OP_PMCRELEASE: 3778 { 3779 pmc_id_t pmcid; 3780 struct pmc *pm; 3781 struct pmc_owner *po; 3782 struct pmc_op_simple sp; 3783 3784 /* 3785 * Find PMC pointer for the named PMC. 3786 * 3787 * Use pmc_release_pmc_descriptor() to switch off the 3788 * PMC, remove all its target threads, and remove the 3789 * PMC from its owner's list. 3790 * 3791 * Remove the owner record if this is the last PMC 3792 * owned. 3793 * 3794 * Free up space. 3795 */ 3796 3797 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3798 break; 3799 3800 pmcid = sp.pm_pmcid; 3801 3802 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3803 break; 3804 3805 po = pm->pm_owner; 3806 pmc_release_pmc_descriptor(pm); 3807 pmc_maybe_remove_owner(po); 3808 pmc_destroy_pmc_descriptor(pm); 3809 } 3810 break; 3811 3812 3813 /* 3814 * Read and/or write a PMC. 3815 */ 3816 3817 case PMC_OP_PMCRW: 3818 { 3819 int adjri; 3820 struct pmc *pm; 3821 uint32_t cpu, ri; 3822 pmc_value_t oldvalue; 3823 struct pmc_binding pb; 3824 struct pmc_op_pmcrw prw; 3825 struct pmc_classdep *pcd; 3826 struct pmc_op_pmcrw *pprw; 3827 3828 PMC_DOWNGRADE_SX(); 3829 3830 if ((error = copyin(arg, &prw, sizeof(prw))) != 0) 3831 break; 3832 3833 ri = 0; 3834 PMCDBG2(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, 3835 prw.pm_flags); 3836 3837 /* must have at least one flag set */ 3838 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { 3839 error = EINVAL; 3840 break; 3841 } 3842 3843 /* locate pmc descriptor */ 3844 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) 3845 break; 3846 3847 /* Can't read a PMC that hasn't been started. */ 3848 if (pm->pm_state != PMC_STATE_ALLOCATED && 3849 pm->pm_state != PMC_STATE_STOPPED && 3850 pm->pm_state != PMC_STATE_RUNNING) { 3851 error = EINVAL; 3852 break; 3853 } 3854 3855 /* writing a new value is allowed only for 'STOPPED' pmcs */ 3856 if (pm->pm_state == PMC_STATE_RUNNING && 3857 (prw.pm_flags & PMC_F_NEWVALUE)) { 3858 error = EBUSY; 3859 break; 3860 } 3861 3862 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 3863 3864 /* 3865 * If this PMC is attached to its owner (i.e., 3866 * the process requesting this operation) and 3867 * is running, then attempt to get an 3868 * upto-date reading from hardware for a READ. 3869 * Writes are only allowed when the PMC is 3870 * stopped, so only update the saved value 3871 * field. 3872 * 3873 * If the PMC is not running, or is not 3874 * attached to its owner, read/write to the 3875 * savedvalue field. 3876 */ 3877 3878 ri = PMC_TO_ROWINDEX(pm); 3879 pcd = pmc_ri_to_classdep(md, ri, &adjri); 3880 3881 mtx_pool_lock_spin(pmc_mtxpool, pm); 3882 cpu = curthread->td_oncpu; 3883 3884 if (prw.pm_flags & PMC_F_OLDVALUE) { 3885 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && 3886 (pm->pm_state == PMC_STATE_RUNNING)) 3887 error = (*pcd->pcd_read_pmc)(cpu, adjri, 3888 &oldvalue); 3889 else 3890 oldvalue = pm->pm_gv.pm_savedvalue; 3891 } 3892 if (prw.pm_flags & PMC_F_NEWVALUE) 3893 pm->pm_gv.pm_savedvalue = prw.pm_value; 3894 3895 mtx_pool_unlock_spin(pmc_mtxpool, pm); 3896 3897 } else { /* System mode PMCs */ 3898 cpu = PMC_TO_CPU(pm); 3899 ri = PMC_TO_ROWINDEX(pm); 3900 pcd = pmc_ri_to_classdep(md, ri, &adjri); 3901 3902 if (!pmc_cpu_is_active(cpu)) { 3903 error = ENXIO; 3904 break; 3905 } 3906 3907 /* move this thread to CPU 'cpu' */ 3908 pmc_save_cpu_binding(&pb); 3909 pmc_select_cpu(cpu); 3910 3911 critical_enter(); 3912 /* save old value */ 3913 if (prw.pm_flags & PMC_F_OLDVALUE) 3914 if ((error = (*pcd->pcd_read_pmc)(cpu, adjri, 3915 &oldvalue))) 3916 goto error; 3917 /* write out new value */ 3918 if (prw.pm_flags & PMC_F_NEWVALUE) 3919 error = (*pcd->pcd_write_pmc)(cpu, adjri, 3920 prw.pm_value); 3921 error: 3922 critical_exit(); 3923 pmc_restore_cpu_binding(&pb); 3924 if (error) 3925 break; 3926 } 3927 3928 pprw = (struct pmc_op_pmcrw *) arg; 3929 3930 #ifdef HWPMC_DEBUG 3931 if (prw.pm_flags & PMC_F_NEWVALUE) 3932 PMCDBG3(PMC,OPS,2, "rw id=%d new %jx -> old %jx", 3933 ri, prw.pm_value, oldvalue); 3934 else if (prw.pm_flags & PMC_F_OLDVALUE) 3935 PMCDBG2(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); 3936 #endif 3937 3938 /* return old value if requested */ 3939 if (prw.pm_flags & PMC_F_OLDVALUE) 3940 if ((error = copyout(&oldvalue, &pprw->pm_value, 3941 sizeof(prw.pm_value)))) 3942 break; 3943 3944 } 3945 break; 3946 3947 3948 /* 3949 * Set the sampling rate for a sampling mode PMC and the 3950 * initial count for a counting mode PMC. 3951 */ 3952 3953 case PMC_OP_PMCSETCOUNT: 3954 { 3955 struct pmc *pm; 3956 struct pmc_op_pmcsetcount sc; 3957 3958 PMC_DOWNGRADE_SX(); 3959 3960 if ((error = copyin(arg, &sc, sizeof(sc))) != 0) 3961 break; 3962 3963 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) 3964 break; 3965 3966 if (pm->pm_state == PMC_STATE_RUNNING) { 3967 error = EBUSY; 3968 break; 3969 } 3970 3971 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 3972 pm->pm_sc.pm_reloadcount = sc.pm_count; 3973 else 3974 pm->pm_sc.pm_initial = sc.pm_count; 3975 } 3976 break; 3977 3978 3979 /* 3980 * Start a PMC. 3981 */ 3982 3983 case PMC_OP_PMCSTART: 3984 { 3985 pmc_id_t pmcid; 3986 struct pmc *pm; 3987 struct pmc_op_simple sp; 3988 3989 sx_assert(&pmc_sx, SX_XLOCKED); 3990 3991 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3992 break; 3993 3994 pmcid = sp.pm_pmcid; 3995 3996 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3997 break; 3998 3999 KASSERT(pmcid == pm->pm_id, 4000 ("[pmc,%d] pmcid %x != id %x", __LINE__, 4001 pm->pm_id, pmcid)); 4002 4003 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ 4004 break; 4005 else if (pm->pm_state != PMC_STATE_STOPPED && 4006 pm->pm_state != PMC_STATE_ALLOCATED) { 4007 error = EINVAL; 4008 break; 4009 } 4010 4011 error = pmc_start(pm); 4012 } 4013 break; 4014 4015 4016 /* 4017 * Stop a PMC. 4018 */ 4019 4020 case PMC_OP_PMCSTOP: 4021 { 4022 pmc_id_t pmcid; 4023 struct pmc *pm; 4024 struct pmc_op_simple sp; 4025 4026 PMC_DOWNGRADE_SX(); 4027 4028 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 4029 break; 4030 4031 pmcid = sp.pm_pmcid; 4032 4033 /* 4034 * Mark the PMC as inactive and invoke the MD stop 4035 * routines if needed. 4036 */ 4037 4038 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 4039 break; 4040 4041 KASSERT(pmcid == pm->pm_id, 4042 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, 4043 pm->pm_id, pmcid)); 4044 4045 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ 4046 break; 4047 else if (pm->pm_state != PMC_STATE_RUNNING) { 4048 error = EINVAL; 4049 break; 4050 } 4051 4052 error = pmc_stop(pm); 4053 } 4054 break; 4055 4056 4057 /* 4058 * Write a user supplied value to the log file. 4059 */ 4060 4061 case PMC_OP_WRITELOG: 4062 { 4063 struct pmc_op_writelog wl; 4064 struct pmc_owner *po; 4065 4066 PMC_DOWNGRADE_SX(); 4067 4068 if ((error = copyin(arg, &wl, sizeof(wl))) != 0) 4069 break; 4070 4071 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 4072 error = EINVAL; 4073 break; 4074 } 4075 4076 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { 4077 error = EINVAL; 4078 break; 4079 } 4080 4081 error = pmclog_process_userlog(po, &wl); 4082 } 4083 break; 4084 4085 4086 default: 4087 error = EINVAL; 4088 break; 4089 } 4090 4091 if (is_sx_downgraded) 4092 sx_sunlock(&pmc_sx); 4093 else 4094 sx_xunlock(&pmc_sx); 4095 done_syscall: 4096 if (error) 4097 counter_u64_add(pmc_stats.pm_syscall_errors, 1); 4098 4099 return (error); 4100 } 4101 4102 /* 4103 * Helper functions 4104 */ 4105 4106 4107 /* 4108 * Mark the thread as needing callchain capture and post an AST. The 4109 * actual callchain capture will be done in a context where it is safe 4110 * to take page faults. 4111 */ 4112 4113 static void 4114 pmc_post_callchain_callback(void) 4115 { 4116 struct thread *td; 4117 4118 td = curthread; 4119 4120 /* 4121 * If there is multiple PMCs for the same interrupt ignore new post 4122 */ 4123 if (td->td_pflags & TDP_CALLCHAIN) 4124 return; 4125 4126 /* 4127 * Mark this thread as needing callchain capture. 4128 * `td->td_pflags' will be safe to touch because this thread 4129 * was in user space when it was interrupted. 4130 */ 4131 td->td_pflags |= TDP_CALLCHAIN; 4132 4133 /* 4134 * Don't let this thread migrate between CPUs until callchain 4135 * capture completes. 4136 */ 4137 sched_pin(); 4138 4139 return; 4140 } 4141 4142 /* 4143 * Interrupt processing. 4144 * 4145 * Find a free slot in the per-cpu array of samples and capture the 4146 * current callchain there. If a sample was successfully added, a bit 4147 * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook 4148 * needs to be invoked from the clock handler. 4149 * 4150 * This function is meant to be called from an NMI handler. It cannot 4151 * use any of the locking primitives supplied by the OS. 4152 */ 4153 4154 int 4155 pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, 4156 int inuserspace) 4157 { 4158 int error, callchaindepth; 4159 struct thread *td; 4160 struct pmc_sample *ps; 4161 struct pmc_samplebuffer *psb; 4162 4163 error = 0; 4164 4165 /* 4166 * Allocate space for a sample buffer. 4167 */ 4168 psb = pmc_pcpu[cpu]->pc_sb[ring]; 4169 4170 ps = psb->ps_write; 4171 if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ 4172 pm->pm_pcpu_state[cpu].pps_stalled = 1; 4173 counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); 4174 PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", 4175 cpu, pm, (void *) tf, inuserspace, 4176 (int) (psb->ps_write - psb->ps_samples), 4177 (int) (psb->ps_read - psb->ps_samples)); 4178 callchaindepth = 1; 4179 error = ENOMEM; 4180 goto done; 4181 } 4182 4183 4184 /* Fill in entry. */ 4185 PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, 4186 (void *) tf, inuserspace, 4187 (int) (psb->ps_write - psb->ps_samples), 4188 (int) (psb->ps_read - psb->ps_samples)); 4189 4190 KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, 4191 ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, 4192 (unsigned long)counter_u64_fetch(pm->pm_runcount))); 4193 4194 counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ 4195 4196 ps->ps_pmc = pm; 4197 if ((td = curthread) && td->td_proc) 4198 ps->ps_pid = td->td_proc->p_pid; 4199 else 4200 ps->ps_pid = -1; 4201 ps->ps_cpu = cpu; 4202 ps->ps_td = td; 4203 ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; 4204 4205 callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? 4206 pmc_callchaindepth : 1; 4207 4208 if (callchaindepth == 1) 4209 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); 4210 else { 4211 /* 4212 * Kernel stack traversals can be done immediately, 4213 * while we defer to an AST for user space traversals. 4214 */ 4215 if (!inuserspace) { 4216 callchaindepth = 4217 pmc_save_kernel_callchain(ps->ps_pc, 4218 callchaindepth, tf); 4219 } else { 4220 pmc_post_callchain_callback(); 4221 callchaindepth = PMC_SAMPLE_INUSE; 4222 } 4223 } 4224 4225 ps->ps_nsamples = callchaindepth; /* mark entry as in use */ 4226 4227 /* increment write pointer, modulo ring buffer size */ 4228 ps++; 4229 if (ps == psb->ps_fence) 4230 psb->ps_write = psb->ps_samples; 4231 else 4232 psb->ps_write = ps; 4233 4234 done: 4235 /* mark CPU as needing processing */ 4236 if (callchaindepth != PMC_SAMPLE_INUSE) 4237 DPCPU_SET(pmc_sampled, 1); 4238 4239 return (error); 4240 } 4241 4242 /* 4243 * Capture a user call chain. This function will be called from ast() 4244 * before control returns to userland and before the process gets 4245 * rescheduled. 4246 */ 4247 4248 static void 4249 pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) 4250 { 4251 struct pmc *pm; 4252 struct thread *td; 4253 struct pmc_sample *ps, *ps_end; 4254 struct pmc_samplebuffer *psb; 4255 #ifdef INVARIANTS 4256 int ncallchains; 4257 int nfree; 4258 #endif 4259 4260 psb = pmc_pcpu[cpu]->pc_sb[ring]; 4261 td = curthread; 4262 4263 KASSERT(td->td_pflags & TDP_CALLCHAIN, 4264 ("[pmc,%d] Retrieving callchain for thread that doesn't want it", 4265 __LINE__)); 4266 4267 #ifdef INVARIANTS 4268 ncallchains = 0; 4269 nfree = 0; 4270 #endif 4271 4272 /* 4273 * Iterate through all deferred callchain requests. 4274 * Walk from the current read pointer to the current 4275 * write pointer. 4276 */ 4277 4278 ps = psb->ps_read; 4279 ps_end = psb->ps_write; 4280 do { 4281 #ifdef INVARIANTS 4282 if ((ps->ps_pmc == NULL) || 4283 (ps->ps_pmc->pm_state != PMC_STATE_RUNNING)) 4284 nfree++; 4285 #endif 4286 if (ps->ps_nsamples != PMC_SAMPLE_INUSE) 4287 goto next; 4288 if (ps->ps_td != td) 4289 goto next; 4290 4291 KASSERT(ps->ps_cpu == cpu, 4292 ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, 4293 ps->ps_cpu, PCPU_GET(cpuid))); 4294 4295 pm = ps->ps_pmc; 4296 4297 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, 4298 ("[pmc,%d] Retrieving callchain for PMC that doesn't " 4299 "want it", __LINE__)); 4300 4301 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, 4302 ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); 4303 4304 /* 4305 * Retrieve the callchain and mark the sample buffer 4306 * as 'processable' by the timer tick sweep code. 4307 */ 4308 ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, 4309 pmc_callchaindepth, tf); 4310 4311 #ifdef INVARIANTS 4312 ncallchains++; 4313 #endif 4314 4315 next: 4316 /* increment the pointer, modulo sample ring size */ 4317 if (++ps == psb->ps_fence) 4318 ps = psb->ps_samples; 4319 } while (ps != ps_end); 4320 4321 #ifdef INVARIANTS 4322 KASSERT(ncallchains > 0 || nfree > 0, 4323 ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, 4324 cpu)); 4325 #endif 4326 4327 KASSERT(td->td_pinned == 1, 4328 ("[pmc,%d] invalid td_pinned value", __LINE__)); 4329 sched_unpin(); /* Can migrate safely now. */ 4330 4331 /* mark CPU as needing processing */ 4332 DPCPU_SET(pmc_sampled, 1); 4333 } 4334 4335 /* 4336 * Process saved PC samples. 4337 */ 4338 4339 static void 4340 pmc_process_samples(int cpu, int ring) 4341 { 4342 struct pmc *pm; 4343 int adjri, n; 4344 struct thread *td; 4345 struct pmc_owner *po; 4346 struct pmc_sample *ps; 4347 struct pmc_classdep *pcd; 4348 struct pmc_samplebuffer *psb; 4349 4350 KASSERT(PCPU_GET(cpuid) == cpu, 4351 ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, 4352 PCPU_GET(cpuid), cpu)); 4353 4354 psb = pmc_pcpu[cpu]->pc_sb[ring]; 4355 4356 for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ 4357 4358 ps = psb->ps_read; 4359 if (ps->ps_nsamples == PMC_SAMPLE_FREE) 4360 break; 4361 4362 pm = ps->ps_pmc; 4363 4364 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, 4365 ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, 4366 (unsigned long)counter_u64_fetch(pm->pm_runcount))); 4367 4368 po = pm->pm_owner; 4369 4370 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 4371 ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, 4372 pm, PMC_TO_MODE(pm))); 4373 4374 /* Ignore PMCs that have been switched off */ 4375 if (pm->pm_state != PMC_STATE_RUNNING) 4376 goto entrydone; 4377 4378 /* If there is a pending AST wait for completion */ 4379 if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { 4380 /* Need a rescan at a later time. */ 4381 DPCPU_SET(pmc_sampled, 1); 4382 break; 4383 } 4384 4385 PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, 4386 pm, ps->ps_nsamples, ps->ps_flags, 4387 (int) (psb->ps_write - psb->ps_samples), 4388 (int) (psb->ps_read - psb->ps_samples)); 4389 4390 /* 4391 * If this is a process-mode PMC that is attached to 4392 * its owner, and if the PC is in user mode, update 4393 * profiling statistics like timer-based profiling 4394 * would have done. 4395 */ 4396 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { 4397 if (ps->ps_flags & PMC_CC_F_USERSPACE) { 4398 td = FIRST_THREAD_IN_PROC(po->po_owner); 4399 addupc_intr(td, ps->ps_pc[0], 1); 4400 } 4401 goto entrydone; 4402 } 4403 4404 /* 4405 * Otherwise, this is either a sampling mode PMC that 4406 * is attached to a different process than its owner, 4407 * or a system-wide sampling PMC. Dispatch a log 4408 * entry to the PMC's owner process. 4409 */ 4410 pmclog_process_callchain(pm, ps); 4411 4412 entrydone: 4413 ps->ps_nsamples = 0; /* mark entry as free */ 4414 counter_u64_add(pm->pm_runcount, -1); 4415 4416 /* increment read pointer, modulo sample size */ 4417 if (++ps == psb->ps_fence) 4418 psb->ps_read = psb->ps_samples; 4419 else 4420 psb->ps_read = ps; 4421 } 4422 4423 counter_u64_add(pmc_stats.pm_log_sweeps, 1); 4424 4425 /* Do not re-enable stalled PMCs if we failed to process any samples */ 4426 if (n == 0) 4427 return; 4428 4429 /* 4430 * Restart any stalled sampling PMCs on this CPU. 4431 * 4432 * If the NMI handler sets the pm_stalled field of a PMC after 4433 * the check below, we'll end up processing the stalled PMC at 4434 * the next hardclock tick. 4435 */ 4436 for (n = 0; n < md->pmd_npmc; n++) { 4437 pcd = pmc_ri_to_classdep(md, n, &adjri); 4438 KASSERT(pcd != NULL, 4439 ("[pmc,%d] null pcd ri=%d", __LINE__, n)); 4440 (void) (*pcd->pcd_get_config)(cpu,adjri,&pm); 4441 4442 if (pm == NULL || /* !cfg'ed */ 4443 pm->pm_state != PMC_STATE_RUNNING || /* !active */ 4444 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ 4445 !pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */ 4446 !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */ 4447 continue; 4448 4449 pm->pm_pcpu_state[cpu].pps_stalled = 0; 4450 (*pcd->pcd_start_pmc)(cpu, adjri); 4451 } 4452 } 4453 4454 /* 4455 * Event handlers. 4456 */ 4457 4458 /* 4459 * Handle a process exit. 4460 * 4461 * Remove this process from all hash tables. If this process 4462 * owned any PMCs, turn off those PMCs and deallocate them, 4463 * removing any associations with target processes. 4464 * 4465 * This function will be called by the last 'thread' of a 4466 * process. 4467 * 4468 * XXX This eventhandler gets called early in the exit process. 4469 * Consider using a 'hook' invocation from thread_exit() or equivalent 4470 * spot. Another negative is that kse_exit doesn't seem to call 4471 * exit1() [??]. 4472 * 4473 */ 4474 4475 static void 4476 pmc_process_exit(void *arg __unused, struct proc *p) 4477 { 4478 struct pmc *pm; 4479 int adjri, cpu; 4480 unsigned int ri; 4481 int is_using_hwpmcs; 4482 struct pmc_owner *po; 4483 struct pmc_process *pp; 4484 struct pmc_classdep *pcd; 4485 pmc_value_t newvalue, tmp; 4486 4487 PROC_LOCK(p); 4488 is_using_hwpmcs = p->p_flag & P_HWPMC; 4489 PROC_UNLOCK(p); 4490 4491 /* 4492 * Log a sysexit event to all SS PMC owners. 4493 */ 4494 epoch_enter(global_epoch); 4495 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4496 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4497 pmclog_process_sysexit(po, p->p_pid); 4498 epoch_exit(global_epoch); 4499 4500 if (!is_using_hwpmcs) 4501 return; 4502 4503 PMC_GET_SX_XLOCK(); 4504 PMCDBG3(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, 4505 p->p_comm); 4506 4507 /* 4508 * Since this code is invoked by the last thread in an exiting 4509 * process, we would have context switched IN at some prior 4510 * point. However, with PREEMPTION, kernel mode context 4511 * switches may happen any time, so we want to disable a 4512 * context switch OUT till we get any PMCs targeting this 4513 * process off the hardware. 4514 * 4515 * We also need to atomically remove this process' 4516 * entry from our target process hash table, using 4517 * PMC_FLAG_REMOVE. 4518 */ 4519 PMCDBG3(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, 4520 p->p_comm); 4521 4522 critical_enter(); /* no preemption */ 4523 4524 cpu = curthread->td_oncpu; 4525 4526 if ((pp = pmc_find_process_descriptor(p, 4527 PMC_FLAG_REMOVE)) != NULL) { 4528 4529 PMCDBG2(PRC,EXT,2, 4530 "process-exit proc=%p pmc-process=%p", p, pp); 4531 4532 /* 4533 * The exiting process could the target of 4534 * some PMCs which will be running on 4535 * currently executing CPU. 4536 * 4537 * We need to turn these PMCs off like we 4538 * would do at context switch OUT time. 4539 */ 4540 for (ri = 0; ri < md->pmd_npmc; ri++) { 4541 4542 /* 4543 * Pick up the pmc pointer from hardware 4544 * state similar to the CSW_OUT code. 4545 */ 4546 pm = NULL; 4547 4548 pcd = pmc_ri_to_classdep(md, ri, &adjri); 4549 4550 (void) (*pcd->pcd_get_config)(cpu, adjri, &pm); 4551 4552 PMCDBG2(PRC,EXT,2, "ri=%d pm=%p", ri, pm); 4553 4554 if (pm == NULL || 4555 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 4556 continue; 4557 4558 PMCDBG4(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " 4559 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, 4560 pm, pm->pm_state); 4561 4562 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 4563 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 4564 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 4565 4566 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 4567 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", 4568 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); 4569 4570 KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, 4571 ("[pmc,%d] bad runcount ri %d rc %ld", 4572 __LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount))); 4573 4574 /* 4575 * Change desired state, and then stop if not 4576 * stalled. This two-step dance should avoid 4577 * race conditions where an interrupt re-enables 4578 * the PMC after this code has already checked 4579 * the pm_stalled flag. 4580 */ 4581 if (pm->pm_pcpu_state[cpu].pps_cpustate) { 4582 pm->pm_pcpu_state[cpu].pps_cpustate = 0; 4583 if (!pm->pm_pcpu_state[cpu].pps_stalled) { 4584 (void) pcd->pcd_stop_pmc(cpu, adjri); 4585 pcd->pcd_read_pmc(cpu, adjri, 4586 &newvalue); 4587 tmp = newvalue - 4588 PMC_PCPU_SAVED(cpu,ri); 4589 4590 mtx_pool_lock_spin(pmc_mtxpool, pm); 4591 pm->pm_gv.pm_savedvalue += tmp; 4592 pp->pp_pmcs[ri].pp_pmcval += tmp; 4593 mtx_pool_unlock_spin(pmc_mtxpool, pm); 4594 } 4595 } 4596 4597 counter_u64_add(pm->pm_runcount, -1); 4598 4599 KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0, 4600 ("[pmc,%d] runcount is %d", __LINE__, ri)); 4601 4602 (void) pcd->pcd_config_pmc(cpu, adjri, NULL); 4603 } 4604 4605 /* 4606 * Inform the MD layer of this pseudo "context switch 4607 * out" 4608 */ 4609 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); 4610 4611 critical_exit(); /* ok to be pre-empted now */ 4612 4613 /* 4614 * Unlink this process from the PMCs that are 4615 * targeting it. This will send a signal to 4616 * all PMC owner's whose PMCs are orphaned. 4617 * 4618 * Log PMC value at exit time if requested. 4619 */ 4620 for (ri = 0; ri < md->pmd_npmc; ri++) 4621 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 4622 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 4623 PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) 4624 pmclog_process_procexit(pm, pp); 4625 pmc_unlink_target_process(pm, pp); 4626 } 4627 free(pp, M_PMC); 4628 4629 } else 4630 critical_exit(); /* pp == NULL */ 4631 4632 4633 /* 4634 * If the process owned PMCs, free them up and free up 4635 * memory. 4636 */ 4637 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 4638 pmc_remove_owner(po); 4639 pmc_destroy_owner_descriptor(po); 4640 } 4641 4642 sx_xunlock(&pmc_sx); 4643 } 4644 4645 /* 4646 * Handle a process fork. 4647 * 4648 * If the parent process 'p1' is under HWPMC monitoring, then copy 4649 * over any attached PMCs that have 'do_descendants' semantics. 4650 */ 4651 4652 static void 4653 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, 4654 int flags) 4655 { 4656 int is_using_hwpmcs; 4657 unsigned int ri; 4658 uint32_t do_descendants; 4659 struct pmc *pm; 4660 struct pmc_owner *po; 4661 struct pmc_process *ppnew, *ppold; 4662 4663 (void) flags; /* unused parameter */ 4664 4665 PROC_LOCK(p1); 4666 is_using_hwpmcs = p1->p_flag & P_HWPMC; 4667 PROC_UNLOCK(p1); 4668 4669 /* 4670 * If there are system-wide sampling PMCs active, we need to 4671 * log all fork events to their owner's logs. 4672 */ 4673 epoch_enter(global_epoch); 4674 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4675 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4676 pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); 4677 epoch_exit(global_epoch); 4678 4679 if (!is_using_hwpmcs) 4680 return; 4681 4682 PMC_GET_SX_XLOCK(); 4683 PMCDBG4(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, 4684 p1->p_pid, p1->p_comm, newproc); 4685 4686 /* 4687 * If the parent process (curthread->td_proc) is a 4688 * target of any PMCs, look for PMCs that are to be 4689 * inherited, and link these into the new process 4690 * descriptor. 4691 */ 4692 if ((ppold = pmc_find_process_descriptor(curthread->td_proc, 4693 PMC_FLAG_NONE)) == NULL) 4694 goto done; /* nothing to do */ 4695 4696 do_descendants = 0; 4697 for (ri = 0; ri < md->pmd_npmc; ri++) 4698 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) 4699 do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; 4700 if (do_descendants == 0) /* nothing to do */ 4701 goto done; 4702 4703 /* allocate a descriptor for the new process */ 4704 if ((ppnew = pmc_find_process_descriptor(newproc, 4705 PMC_FLAG_ALLOCATE)) == NULL) 4706 goto done; 4707 4708 /* 4709 * Run through all PMCs that were targeting the old process 4710 * and which specified F_DESCENDANTS and attach them to the 4711 * new process. 4712 * 4713 * Log the fork event to all owners of PMCs attached to this 4714 * process, if not already logged. 4715 */ 4716 for (ri = 0; ri < md->pmd_npmc; ri++) 4717 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && 4718 (pm->pm_flags & PMC_F_DESCENDANTS)) { 4719 pmc_link_target_process(pm, ppnew); 4720 po = pm->pm_owner; 4721 if (po->po_sscount == 0 && 4722 po->po_flags & PMC_PO_OWNS_LOGFILE) 4723 pmclog_process_procfork(po, p1->p_pid, 4724 newproc->p_pid); 4725 } 4726 4727 /* 4728 * Now mark the new process as being tracked by this driver. 4729 */ 4730 PROC_LOCK(newproc); 4731 newproc->p_flag |= P_HWPMC; 4732 PROC_UNLOCK(newproc); 4733 4734 done: 4735 sx_xunlock(&pmc_sx); 4736 } 4737 4738 static void 4739 pmc_kld_load(void *arg __unused, linker_file_t lf) 4740 { 4741 struct pmc_owner *po; 4742 4743 /* 4744 * Notify owners of system sampling PMCs about KLD operations. 4745 */ 4746 epoch_enter(global_epoch); 4747 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4748 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4749 pmclog_process_map_in(po, (pid_t) -1, 4750 (uintfptr_t) lf->address, lf->filename); 4751 epoch_exit(global_epoch); 4752 4753 /* 4754 * TODO: Notify owners of (all) process-sampling PMCs too. 4755 */ 4756 } 4757 4758 static void 4759 pmc_kld_unload(void *arg __unused, const char *filename __unused, 4760 caddr_t address, size_t size) 4761 { 4762 struct pmc_owner *po; 4763 4764 epoch_enter(global_epoch); 4765 CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4766 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4767 pmclog_process_map_out(po, (pid_t) -1, 4768 (uintfptr_t) address, (uintfptr_t) address + size); 4769 epoch_exit(global_epoch); 4770 4771 /* 4772 * TODO: Notify owners of process-sampling PMCs. 4773 */ 4774 } 4775 4776 /* 4777 * initialization 4778 */ 4779 static const char * 4780 pmc_name_of_pmcclass(enum pmc_class class) 4781 { 4782 4783 switch (class) { 4784 #undef __PMC_CLASS 4785 #define __PMC_CLASS(S,V,D) \ 4786 case PMC_CLASS_##S: \ 4787 return #S; 4788 __PMC_CLASSES(); 4789 default: 4790 return ("<unknown>"); 4791 } 4792 } 4793 4794 /* 4795 * Base class initializer: allocate structure and set default classes. 4796 */ 4797 struct pmc_mdep * 4798 pmc_mdep_alloc(int nclasses) 4799 { 4800 struct pmc_mdep *md; 4801 int n; 4802 4803 /* SOFT + md classes */ 4804 n = 1 + nclasses; 4805 md = malloc(sizeof(struct pmc_mdep) + n * 4806 sizeof(struct pmc_classdep), M_PMC, M_WAITOK|M_ZERO); 4807 md->pmd_nclass = n; 4808 4809 /* Add base class. */ 4810 pmc_soft_initialize(md); 4811 return md; 4812 } 4813 4814 void 4815 pmc_mdep_free(struct pmc_mdep *md) 4816 { 4817 pmc_soft_finalize(md); 4818 free(md, M_PMC); 4819 } 4820 4821 static int 4822 generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) 4823 { 4824 (void) pc; (void) pp; 4825 4826 return (0); 4827 } 4828 4829 static int 4830 generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) 4831 { 4832 (void) pc; (void) pp; 4833 4834 return (0); 4835 } 4836 4837 static struct pmc_mdep * 4838 pmc_generic_cpu_initialize(void) 4839 { 4840 struct pmc_mdep *md; 4841 4842 md = pmc_mdep_alloc(0); 4843 4844 md->pmd_cputype = PMC_CPU_GENERIC; 4845 4846 md->pmd_pcpu_init = NULL; 4847 md->pmd_pcpu_fini = NULL; 4848 md->pmd_switch_in = generic_switch_in; 4849 md->pmd_switch_out = generic_switch_out; 4850 4851 return (md); 4852 } 4853 4854 static void 4855 pmc_generic_cpu_finalize(struct pmc_mdep *md) 4856 { 4857 (void) md; 4858 } 4859 4860 4861 static int 4862 pmc_initialize(void) 4863 { 4864 int c, cpu, error, n, ri; 4865 unsigned int maxcpu, domain; 4866 struct pcpu *pc; 4867 struct pmc_binding pb; 4868 struct pmc_sample *ps; 4869 struct pmc_classdep *pcd; 4870 struct pmc_samplebuffer *sb; 4871 4872 md = NULL; 4873 error = 0; 4874 4875 pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK); 4876 pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK); 4877 pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK); 4878 pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK); 4879 pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK); 4880 pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); 4881 pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); 4882 pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); 4883 4884 #ifdef HWPMC_DEBUG 4885 /* parse debug flags first */ 4886 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", 4887 pmc_debugstr, sizeof(pmc_debugstr))) 4888 pmc_debugflags_parse(pmc_debugstr, 4889 pmc_debugstr+strlen(pmc_debugstr)); 4890 #endif 4891 4892 PMCDBG1(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); 4893 4894 /* check kernel version */ 4895 if (pmc_kernel_version != PMC_VERSION) { 4896 if (pmc_kernel_version == 0) 4897 printf("hwpmc: this kernel has not been compiled with " 4898 "'options HWPMC_HOOKS'.\n"); 4899 else 4900 printf("hwpmc: kernel version (0x%x) does not match " 4901 "module version (0x%x).\n", pmc_kernel_version, 4902 PMC_VERSION); 4903 return EPROGMISMATCH; 4904 } 4905 4906 /* 4907 * check sysctl parameters 4908 */ 4909 4910 if (pmc_hashsize <= 0) { 4911 (void) printf("hwpmc: tunable \"hashsize\"=%d must be " 4912 "greater than zero.\n", pmc_hashsize); 4913 pmc_hashsize = PMC_HASH_SIZE; 4914 } 4915 4916 if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { 4917 (void) printf("hwpmc: tunable \"nsamples\"=%d out of " 4918 "range.\n", pmc_nsamples); 4919 pmc_nsamples = PMC_NSAMPLES; 4920 } 4921 4922 if (pmc_callchaindepth <= 0 || 4923 pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { 4924 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " 4925 "range - using %d.\n", pmc_callchaindepth, 4926 PMC_CALLCHAIN_DEPTH_MAX); 4927 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH_MAX; 4928 } 4929 4930 md = pmc_md_initialize(); 4931 if (md == NULL) { 4932 /* Default to generic CPU. */ 4933 md = pmc_generic_cpu_initialize(); 4934 if (md == NULL) 4935 return (ENOSYS); 4936 } 4937 4938 KASSERT(md->pmd_nclass >= 1 && md->pmd_npmc >= 1, 4939 ("[pmc,%d] no classes or pmcs", __LINE__)); 4940 4941 /* Compute the map from row-indices to classdep pointers. */ 4942 pmc_rowindex_to_classdep = malloc(sizeof(struct pmc_classdep *) * 4943 md->pmd_npmc, M_PMC, M_WAITOK|M_ZERO); 4944 4945 for (n = 0; n < md->pmd_npmc; n++) 4946 pmc_rowindex_to_classdep[n] = NULL; 4947 for (ri = c = 0; c < md->pmd_nclass; c++) { 4948 pcd = &md->pmd_classdep[c]; 4949 for (n = 0; n < pcd->pcd_num; n++, ri++) 4950 pmc_rowindex_to_classdep[ri] = pcd; 4951 } 4952 4953 KASSERT(ri == md->pmd_npmc, 4954 ("[pmc,%d] npmc miscomputed: ri=%d, md->npmc=%d", __LINE__, 4955 ri, md->pmd_npmc)); 4956 4957 maxcpu = pmc_cpu_max(); 4958 4959 /* allocate space for the per-cpu array */ 4960 pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), M_PMC, 4961 M_WAITOK|M_ZERO); 4962 4963 /* per-cpu 'saved values' for managing process-mode PMCs */ 4964 pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, 4965 M_PMC, M_WAITOK); 4966 4967 /* Perform CPU-dependent initialization. */ 4968 pmc_save_cpu_binding(&pb); 4969 error = 0; 4970 for (cpu = 0; error == 0 && cpu < maxcpu; cpu++) { 4971 if (!pmc_cpu_is_active(cpu)) 4972 continue; 4973 pmc_select_cpu(cpu); 4974 pmc_pcpu[cpu] = malloc(sizeof(struct pmc_cpu) + 4975 md->pmd_npmc * sizeof(struct pmc_hw *), M_PMC, 4976 M_WAITOK|M_ZERO); 4977 if (md->pmd_pcpu_init) 4978 error = md->pmd_pcpu_init(md, cpu); 4979 for (n = 0; error == 0 && n < md->pmd_nclass; n++) 4980 error = md->pmd_classdep[n].pcd_pcpu_init(md, cpu); 4981 } 4982 pmc_restore_cpu_binding(&pb); 4983 4984 if (error) 4985 return (error); 4986 4987 /* allocate space for the sample array */ 4988 for (cpu = 0; cpu < maxcpu; cpu++) { 4989 if (!pmc_cpu_is_active(cpu)) 4990 continue; 4991 pc = pcpu_find(cpu); 4992 domain = pc->pc_domain; 4993 sb = malloc_domain(sizeof(struct pmc_samplebuffer) + 4994 pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, 4995 M_WAITOK|M_ZERO); 4996 sb->ps_read = sb->ps_write = sb->ps_samples; 4997 sb->ps_fence = sb->ps_samples + pmc_nsamples; 4998 4999 KASSERT(pmc_pcpu[cpu] != NULL, 5000 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); 5001 5002 sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * 5003 sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); 5004 5005 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) 5006 ps->ps_pc = sb->ps_callchains + 5007 (n * pmc_callchaindepth); 5008 5009 pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; 5010 5011 sb = malloc_domain(sizeof(struct pmc_samplebuffer) + 5012 pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, 5013 M_WAITOK|M_ZERO); 5014 sb->ps_read = sb->ps_write = sb->ps_samples; 5015 sb->ps_fence = sb->ps_samples + pmc_nsamples; 5016 5017 KASSERT(pmc_pcpu[cpu] != NULL, 5018 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); 5019 5020 sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * 5021 sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); 5022 5023 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) 5024 ps->ps_pc = sb->ps_callchains + 5025 (n * pmc_callchaindepth); 5026 5027 pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; 5028 } 5029 5030 /* allocate space for the row disposition array */ 5031 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, 5032 M_PMC, M_WAITOK|M_ZERO); 5033 5034 /* mark all PMCs as available */ 5035 for (n = 0; n < (int) md->pmd_npmc; n++) 5036 PMC_MARK_ROW_FREE(n); 5037 5038 /* allocate thread hash tables */ 5039 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, 5040 &pmc_ownerhashmask); 5041 5042 pmc_processhash = hashinit(pmc_hashsize, M_PMC, 5043 &pmc_processhashmask); 5044 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", 5045 MTX_SPIN); 5046 5047 LIST_INIT(&pmc_ss_owners); 5048 pmc_ss_count = 0; 5049 5050 /* allocate a pool of spin mutexes */ 5051 pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, 5052 MTX_SPIN); 5053 5054 PMCDBG4(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " 5055 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, 5056 pmc_processhash, pmc_processhashmask); 5057 5058 /* register process {exit,fork,exec} handlers */ 5059 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, 5060 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); 5061 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, 5062 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); 5063 5064 /* register kld event handlers */ 5065 pmc_kld_load_tag = EVENTHANDLER_REGISTER(kld_load, pmc_kld_load, 5066 NULL, EVENTHANDLER_PRI_ANY); 5067 pmc_kld_unload_tag = EVENTHANDLER_REGISTER(kld_unload, pmc_kld_unload, 5068 NULL, EVENTHANDLER_PRI_ANY); 5069 5070 /* initialize logging */ 5071 pmclog_initialize(); 5072 5073 /* set hook functions */ 5074 pmc_intr = md->pmd_intr; 5075 wmb(); 5076 pmc_hook = pmc_hook_handler; 5077 5078 if (error == 0) { 5079 printf(PMC_MODULE_NAME ":"); 5080 for (n = 0; n < (int) md->pmd_nclass; n++) { 5081 pcd = &md->pmd_classdep[n]; 5082 printf(" %s/%d/%d/0x%b", 5083 pmc_name_of_pmcclass(pcd->pcd_class), 5084 pcd->pcd_num, 5085 pcd->pcd_width, 5086 pcd->pcd_caps, 5087 "\20" 5088 "\1INT\2USR\3SYS\4EDG\5THR" 5089 "\6REA\7WRI\10INV\11QUA\12PRC" 5090 "\13TAG\14CSC"); 5091 } 5092 printf("\n"); 5093 } 5094 5095 return (error); 5096 } 5097 5098 /* prepare to be unloaded */ 5099 static void 5100 pmc_cleanup(void) 5101 { 5102 int c, cpu; 5103 unsigned int maxcpu; 5104 struct pmc_ownerhash *ph; 5105 struct pmc_owner *po, *tmp; 5106 struct pmc_binding pb; 5107 #ifdef HWPMC_DEBUG 5108 struct pmc_processhash *prh; 5109 #endif 5110 5111 PMCDBG0(MOD,INI,0, "cleanup"); 5112 5113 /* switch off sampling */ 5114 CPU_FOREACH(cpu) 5115 DPCPU_ID_SET(cpu, pmc_sampled, 0); 5116 pmc_intr = NULL; 5117 5118 sx_xlock(&pmc_sx); 5119 if (pmc_hook == NULL) { /* being unloaded already */ 5120 sx_xunlock(&pmc_sx); 5121 return; 5122 } 5123 5124 pmc_hook = NULL; /* prevent new threads from entering module */ 5125 5126 /* deregister event handlers */ 5127 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); 5128 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); 5129 EVENTHANDLER_DEREGISTER(kld_load, pmc_kld_load_tag); 5130 EVENTHANDLER_DEREGISTER(kld_unload, pmc_kld_unload_tag); 5131 5132 /* send SIGBUS to all owner threads, free up allocations */ 5133 if (pmc_ownerhash) 5134 for (ph = pmc_ownerhash; 5135 ph <= &pmc_ownerhash[pmc_ownerhashmask]; 5136 ph++) { 5137 LIST_FOREACH_SAFE(po, ph, po_next, tmp) { 5138 pmc_remove_owner(po); 5139 5140 /* send SIGBUS to owner processes */ 5141 PMCDBG3(MOD,INI,2, "cleanup signal proc=%p " 5142 "(%d, %s)", po->po_owner, 5143 po->po_owner->p_pid, 5144 po->po_owner->p_comm); 5145 5146 PROC_LOCK(po->po_owner); 5147 kern_psignal(po->po_owner, SIGBUS); 5148 PROC_UNLOCK(po->po_owner); 5149 5150 pmc_destroy_owner_descriptor(po); 5151 } 5152 } 5153 5154 /* reclaim allocated data structures */ 5155 if (pmc_mtxpool) 5156 mtx_pool_destroy(&pmc_mtxpool); 5157 5158 mtx_destroy(&pmc_processhash_mtx); 5159 if (pmc_processhash) { 5160 #ifdef HWPMC_DEBUG 5161 struct pmc_process *pp; 5162 5163 PMCDBG0(MOD,INI,3, "destroy process hash"); 5164 for (prh = pmc_processhash; 5165 prh <= &pmc_processhash[pmc_processhashmask]; 5166 prh++) 5167 LIST_FOREACH(pp, prh, pp_next) 5168 PMCDBG1(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); 5169 #endif 5170 5171 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); 5172 pmc_processhash = NULL; 5173 } 5174 5175 if (pmc_ownerhash) { 5176 PMCDBG0(MOD,INI,3, "destroy owner hash"); 5177 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); 5178 pmc_ownerhash = NULL; 5179 } 5180 5181 KASSERT(LIST_EMPTY(&pmc_ss_owners), 5182 ("[pmc,%d] Global SS owner list not empty", __LINE__)); 5183 KASSERT(pmc_ss_count == 0, 5184 ("[pmc,%d] Global SS count not empty", __LINE__)); 5185 5186 /* do processor and pmc-class dependent cleanup */ 5187 maxcpu = pmc_cpu_max(); 5188 5189 PMCDBG0(MOD,INI,3, "md cleanup"); 5190 if (md) { 5191 pmc_save_cpu_binding(&pb); 5192 for (cpu = 0; cpu < maxcpu; cpu++) { 5193 PMCDBG2(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", 5194 cpu, pmc_pcpu[cpu]); 5195 if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) 5196 continue; 5197 pmc_select_cpu(cpu); 5198 for (c = 0; c < md->pmd_nclass; c++) 5199 md->pmd_classdep[c].pcd_pcpu_fini(md, cpu); 5200 if (md->pmd_pcpu_fini) 5201 md->pmd_pcpu_fini(md, cpu); 5202 } 5203 5204 if (md->pmd_cputype == PMC_CPU_GENERIC) 5205 pmc_generic_cpu_finalize(md); 5206 else 5207 pmc_md_finalize(md); 5208 5209 pmc_mdep_free(md); 5210 md = NULL; 5211 pmc_restore_cpu_binding(&pb); 5212 } 5213 5214 /* Free per-cpu descriptors. */ 5215 for (cpu = 0; cpu < maxcpu; cpu++) { 5216 if (!pmc_cpu_is_active(cpu)) 5217 continue; 5218 KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_HR] != NULL, 5219 ("[pmc,%d] Null hw cpu sample buffer cpu=%d", __LINE__, 5220 cpu)); 5221 KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, 5222 ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, 5223 cpu)); 5224 free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); 5225 free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); 5226 free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); 5227 free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); 5228 free_domain(pmc_pcpu[cpu], M_PMC); 5229 } 5230 5231 free(pmc_pcpu, M_PMC); 5232 pmc_pcpu = NULL; 5233 5234 free(pmc_pcpu_saved, M_PMC); 5235 pmc_pcpu_saved = NULL; 5236 5237 if (pmc_pmcdisp) { 5238 free(pmc_pmcdisp, M_PMC); 5239 pmc_pmcdisp = NULL; 5240 } 5241 5242 if (pmc_rowindex_to_classdep) { 5243 free(pmc_rowindex_to_classdep, M_PMC); 5244 pmc_rowindex_to_classdep = NULL; 5245 } 5246 5247 pmclog_shutdown(); 5248 counter_u64_free(pmc_stats.pm_intr_ignored); 5249 counter_u64_free(pmc_stats.pm_intr_processed); 5250 counter_u64_free(pmc_stats.pm_intr_bufferfull); 5251 counter_u64_free(pmc_stats.pm_syscalls); 5252 counter_u64_free(pmc_stats.pm_syscall_errors); 5253 counter_u64_free(pmc_stats.pm_buffer_requests); 5254 counter_u64_free(pmc_stats.pm_buffer_requests_failed); 5255 counter_u64_free(pmc_stats.pm_log_sweeps); 5256 sx_xunlock(&pmc_sx); /* we are done */ 5257 } 5258 5259 /* 5260 * The function called at load/unload. 5261 */ 5262 5263 static int 5264 load (struct module *module __unused, int cmd, void *arg __unused) 5265 { 5266 int error; 5267 5268 error = 0; 5269 5270 switch (cmd) { 5271 case MOD_LOAD : 5272 /* initialize the subsystem */ 5273 error = pmc_initialize(); 5274 if (error != 0) 5275 break; 5276 PMCDBG2(MOD,INI,1, "syscall=%d maxcpu=%d", 5277 pmc_syscall_num, pmc_cpu_max()); 5278 break; 5279 5280 5281 case MOD_UNLOAD : 5282 case MOD_SHUTDOWN: 5283 pmc_cleanup(); 5284 PMCDBG0(MOD,INI,1, "unloaded"); 5285 break; 5286 5287 default : 5288 error = EINVAL; /* XXX should panic(9) */ 5289 break; 5290 } 5291 5292 return error; 5293 } 5294