1 /*- 2 * Copyright (c) 2003-2007 Joseph Koshy 3 * Copyright (c) 2007 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by A. Joseph Koshy under 7 * sponsorship from the FreeBSD Foundation and Google, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/eventhandler.h> 37 #include <sys/jail.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/mutex.h> 45 #include <sys/pmc.h> 46 #include <sys/pmckern.h> 47 #include <sys/pmclog.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/queue.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sched.h> 53 #include <sys/signalvar.h> 54 #include <sys/smp.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/vnode.h> 60 61 #include <sys/linker.h> /* needs to be after <sys/malloc.h> */ 62 63 #include <machine/atomic.h> 64 #include <machine/md_var.h> 65 66 /* 67 * Types 68 */ 69 70 enum pmc_flags { 71 PMC_FLAG_NONE = 0x00, /* do nothing */ 72 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ 73 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ 74 }; 75 76 /* 77 * The offset in sysent where the syscall is allocated. 78 */ 79 80 static int pmc_syscall_num = NO_SYSCALL; 81 struct pmc_cpu **pmc_pcpu; /* per-cpu state */ 82 pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ 83 84 #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] 85 86 struct mtx_pool *pmc_mtxpool; 87 static int *pmc_pmcdisp; /* PMC row dispositions */ 88 89 #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) 90 #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) 91 #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) 92 93 #define PMC_MARK_ROW_FREE(R) do { \ 94 pmc_pmcdisp[(R)] = 0; \ 95 } while (0) 96 97 #define PMC_MARK_ROW_STANDALONE(R) do { \ 98 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 99 __LINE__)); \ 100 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 101 KASSERT(pmc_pmcdisp[(R)] >= (-mp_ncpus), ("[pmc,%d] row " \ 102 "disposition error", __LINE__)); \ 103 } while (0) 104 105 #define PMC_UNMARK_ROW_STANDALONE(R) do { \ 106 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 107 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 108 __LINE__)); \ 109 } while (0) 110 111 #define PMC_MARK_ROW_THREAD(R) do { \ 112 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 113 __LINE__)); \ 114 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 115 } while (0) 116 117 #define PMC_UNMARK_ROW_THREAD(R) do { \ 118 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 119 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 120 __LINE__)); \ 121 } while (0) 122 123 124 /* various event handlers */ 125 static eventhandler_tag pmc_exit_tag, pmc_fork_tag; 126 127 /* Module statistics */ 128 struct pmc_op_getdriverstats pmc_stats; 129 130 /* Machine/processor dependent operations */ 131 struct pmc_mdep *md; 132 133 /* 134 * Hash tables mapping owner processes and target threads to PMCs. 135 */ 136 137 struct mtx pmc_processhash_mtx; /* spin mutex */ 138 static u_long pmc_processhashmask; 139 static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; 140 141 /* 142 * Hash table of PMC owner descriptors. This table is protected by 143 * the shared PMC "sx" lock. 144 */ 145 146 static u_long pmc_ownerhashmask; 147 static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; 148 149 /* 150 * List of PMC owners with system-wide sampling PMCs. 151 */ 152 153 static LIST_HEAD(, pmc_owner) pmc_ss_owners; 154 155 156 /* 157 * Prototypes 158 */ 159 160 #ifdef DEBUG 161 static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); 162 static int pmc_debugflags_parse(char *newstr, char *fence); 163 #endif 164 165 static int load(struct module *module, int cmd, void *arg); 166 static int pmc_attach_process(struct proc *p, struct pmc *pm); 167 static struct pmc *pmc_allocate_pmc_descriptor(void); 168 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); 169 static int pmc_attach_one_process(struct proc *p, struct pmc *pm); 170 static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, 171 int cpu); 172 static int pmc_can_attach(struct pmc *pm, struct proc *p); 173 static void pmc_capture_user_callchain(int cpu, struct trapframe *tf); 174 static void pmc_cleanup(void); 175 static int pmc_detach_process(struct proc *p, struct pmc *pm); 176 static int pmc_detach_one_process(struct proc *p, struct pmc *pm, 177 int flags); 178 static void pmc_destroy_owner_descriptor(struct pmc_owner *po); 179 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); 180 static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); 181 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, 182 pmc_id_t pmc); 183 static struct pmc_process *pmc_find_process_descriptor(struct proc *p, 184 uint32_t mode); 185 static void pmc_force_context_switch(void); 186 static void pmc_link_target_process(struct pmc *pm, 187 struct pmc_process *pp); 188 static void pmc_log_all_process_mappings(struct pmc_owner *po); 189 static void pmc_log_kernel_mappings(struct pmc *pm); 190 static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); 191 static void pmc_maybe_remove_owner(struct pmc_owner *po); 192 static void pmc_process_csw_in(struct thread *td); 193 static void pmc_process_csw_out(struct thread *td); 194 static void pmc_process_exit(void *arg, struct proc *p); 195 static void pmc_process_fork(void *arg, struct proc *p1, 196 struct proc *p2, int n); 197 static void pmc_process_samples(int cpu); 198 static void pmc_release_pmc_descriptor(struct pmc *pmc); 199 static void pmc_remove_owner(struct pmc_owner *po); 200 static void pmc_remove_process_descriptor(struct pmc_process *pp); 201 static void pmc_restore_cpu_binding(struct pmc_binding *pb); 202 static void pmc_save_cpu_binding(struct pmc_binding *pb); 203 static void pmc_select_cpu(int cpu); 204 static int pmc_start(struct pmc *pm); 205 static int pmc_stop(struct pmc *pm); 206 static int pmc_syscall_handler(struct thread *td, void *syscall_args); 207 static void pmc_unlink_target_process(struct pmc *pmc, 208 struct pmc_process *pp); 209 210 /* 211 * Kernel tunables and sysctl(8) interface. 212 */ 213 214 SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters"); 215 216 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; 217 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth); 218 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD, 219 &pmc_callchaindepth, 0, "depth of call chain records"); 220 221 #ifdef DEBUG 222 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; 223 char pmc_debugstr[PMC_DEBUG_STRSIZE]; 224 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, 225 sizeof(pmc_debugstr)); 226 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, 227 CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_TUN, 228 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); 229 #endif 230 231 /* 232 * kern.hwpmc.hashrows -- determines the number of rows in the 233 * of the hash table used to look up threads 234 */ 235 236 static int pmc_hashsize = PMC_HASH_SIZE; 237 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "hashsize", &pmc_hashsize); 238 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD, 239 &pmc_hashsize, 0, "rows in hash tables"); 240 241 /* 242 * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU 243 */ 244 245 static int pmc_nsamples = PMC_NSAMPLES; 246 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples); 247 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD, 248 &pmc_nsamples, 0, "number of PC samples per CPU"); 249 250 251 /* 252 * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. 253 */ 254 255 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; 256 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "mtxpoolsize", &pmc_mtxpool_size); 257 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD, 258 &pmc_mtxpool_size, 0, "size of spin mutex pool"); 259 260 261 /* 262 * security.bsd.unprivileged_syspmcs -- allow non-root processes to 263 * allocate system-wide PMCs. 264 * 265 * Allowing unprivileged processes to allocate system PMCs is convenient 266 * if system-wide measurements need to be taken concurrently with other 267 * per-process measurements. This feature is turned off by default. 268 */ 269 270 static int pmc_unprivileged_syspmcs = 0; 271 TUNABLE_INT("security.bsd.unprivileged_syspmcs", &pmc_unprivileged_syspmcs); 272 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW, 273 &pmc_unprivileged_syspmcs, 0, 274 "allow unprivileged process to allocate system PMCs"); 275 276 /* 277 * Hash function. Discard the lower 2 bits of the pointer since 278 * these are always zero for our uses. The hash multiplier is 279 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). 280 */ 281 282 #if LONG_BIT == 64 283 #define _PMC_HM 11400714819323198486u 284 #elif LONG_BIT == 32 285 #define _PMC_HM 2654435769u 286 #else 287 #error Must know the size of 'long' to compile 288 #endif 289 290 #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) 291 292 /* 293 * Syscall structures 294 */ 295 296 /* The `sysent' for the new syscall */ 297 static struct sysent pmc_sysent = { 298 2, /* sy_narg */ 299 pmc_syscall_handler /* sy_call */ 300 }; 301 302 static struct syscall_module_data pmc_syscall_mod = { 303 load, 304 NULL, 305 &pmc_syscall_num, 306 &pmc_sysent, 307 { 0, NULL } 308 }; 309 310 static moduledata_t pmc_mod = { 311 PMC_MODULE_NAME, 312 syscall_module_handler, 313 &pmc_syscall_mod 314 }; 315 316 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); 317 MODULE_VERSION(pmc, PMC_VERSION); 318 319 #ifdef DEBUG 320 enum pmc_dbgparse_state { 321 PMCDS_WS, /* in whitespace */ 322 PMCDS_MAJOR, /* seen a major keyword */ 323 PMCDS_MINOR 324 }; 325 326 static int 327 pmc_debugflags_parse(char *newstr, char *fence) 328 { 329 char c, *p, *q; 330 struct pmc_debugflags *tmpflags; 331 int error, found, *newbits, tmp; 332 size_t kwlen; 333 334 MALLOC(tmpflags, struct pmc_debugflags *, sizeof(*tmpflags), 335 M_PMC, M_WAITOK|M_ZERO); 336 337 p = newstr; 338 error = 0; 339 340 for (; p < fence && (c = *p); p++) { 341 342 /* skip white space */ 343 if (c == ' ' || c == '\t') 344 continue; 345 346 /* look for a keyword followed by "=" */ 347 for (q = p; p < fence && (c = *p) && c != '='; p++) 348 ; 349 if (c != '=') { 350 error = EINVAL; 351 goto done; 352 } 353 354 kwlen = p - q; 355 newbits = NULL; 356 357 /* lookup flag group name */ 358 #define DBG_SET_FLAG_MAJ(S,F) \ 359 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 360 newbits = &tmpflags->pdb_ ## F; 361 362 DBG_SET_FLAG_MAJ("cpu", CPU); 363 DBG_SET_FLAG_MAJ("csw", CSW); 364 DBG_SET_FLAG_MAJ("logging", LOG); 365 DBG_SET_FLAG_MAJ("module", MOD); 366 DBG_SET_FLAG_MAJ("md", MDP); 367 DBG_SET_FLAG_MAJ("owner", OWN); 368 DBG_SET_FLAG_MAJ("pmc", PMC); 369 DBG_SET_FLAG_MAJ("process", PRC); 370 DBG_SET_FLAG_MAJ("sampling", SAM); 371 372 if (newbits == NULL) { 373 error = EINVAL; 374 goto done; 375 } 376 377 p++; /* skip the '=' */ 378 379 /* Now parse the individual flags */ 380 tmp = 0; 381 newflag: 382 for (q = p; p < fence && (c = *p); p++) 383 if (c == ' ' || c == '\t' || c == ',') 384 break; 385 386 /* p == fence or c == ws or c == "," or c == 0 */ 387 388 if ((kwlen = p - q) == 0) { 389 *newbits = tmp; 390 continue; 391 } 392 393 found = 0; 394 #define DBG_SET_FLAG_MIN(S,F) \ 395 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 396 tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) 397 398 /* a '*' denotes all possible flags in the group */ 399 if (kwlen == 1 && *q == '*') 400 tmp = found = ~0; 401 /* look for individual flag names */ 402 DBG_SET_FLAG_MIN("allocaterow", ALR); 403 DBG_SET_FLAG_MIN("allocate", ALL); 404 DBG_SET_FLAG_MIN("attach", ATT); 405 DBG_SET_FLAG_MIN("bind", BND); 406 DBG_SET_FLAG_MIN("config", CFG); 407 DBG_SET_FLAG_MIN("exec", EXC); 408 DBG_SET_FLAG_MIN("exit", EXT); 409 DBG_SET_FLAG_MIN("find", FND); 410 DBG_SET_FLAG_MIN("flush", FLS); 411 DBG_SET_FLAG_MIN("fork", FRK); 412 DBG_SET_FLAG_MIN("getbuf", GTB); 413 DBG_SET_FLAG_MIN("hook", PMH); 414 DBG_SET_FLAG_MIN("init", INI); 415 DBG_SET_FLAG_MIN("intr", INT); 416 DBG_SET_FLAG_MIN("linktarget", TLK); 417 DBG_SET_FLAG_MIN("mayberemove", OMR); 418 DBG_SET_FLAG_MIN("ops", OPS); 419 DBG_SET_FLAG_MIN("read", REA); 420 DBG_SET_FLAG_MIN("register", REG); 421 DBG_SET_FLAG_MIN("release", REL); 422 DBG_SET_FLAG_MIN("remove", ORM); 423 DBG_SET_FLAG_MIN("sample", SAM); 424 DBG_SET_FLAG_MIN("scheduleio", SIO); 425 DBG_SET_FLAG_MIN("select", SEL); 426 DBG_SET_FLAG_MIN("signal", SIG); 427 DBG_SET_FLAG_MIN("swi", SWI); 428 DBG_SET_FLAG_MIN("swo", SWO); 429 DBG_SET_FLAG_MIN("start", STA); 430 DBG_SET_FLAG_MIN("stop", STO); 431 DBG_SET_FLAG_MIN("syscall", PMS); 432 DBG_SET_FLAG_MIN("unlinktarget", TUL); 433 DBG_SET_FLAG_MIN("write", WRI); 434 if (found == 0) { 435 /* unrecognized flag name */ 436 error = EINVAL; 437 goto done; 438 } 439 440 if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ 441 *newbits = tmp; 442 continue; 443 } 444 445 p++; 446 goto newflag; 447 } 448 449 /* save the new flag set */ 450 bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); 451 452 done: 453 FREE(tmpflags, M_PMC); 454 return error; 455 } 456 457 static int 458 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) 459 { 460 char *fence, *newstr; 461 int error; 462 unsigned int n; 463 464 (void) arg1; (void) arg2; /* unused parameters */ 465 466 n = sizeof(pmc_debugstr); 467 MALLOC(newstr, char *, n, M_PMC, M_ZERO|M_WAITOK); 468 (void) strlcpy(newstr, pmc_debugstr, n); 469 470 error = sysctl_handle_string(oidp, newstr, n, req); 471 472 /* if there is a new string, parse and copy it */ 473 if (error == 0 && req->newptr != NULL) { 474 fence = newstr + (n < req->newlen ? n : req->newlen + 1); 475 if ((error = pmc_debugflags_parse(newstr, fence)) == 0) 476 (void) strlcpy(pmc_debugstr, newstr, 477 sizeof(pmc_debugstr)); 478 } 479 480 FREE(newstr, M_PMC); 481 482 return error; 483 } 484 #endif 485 486 /* 487 * Concurrency Control 488 * 489 * The driver manages the following data structures: 490 * 491 * - target process descriptors, one per target process 492 * - owner process descriptors (and attached lists), one per owner process 493 * - lookup hash tables for owner and target processes 494 * - PMC descriptors (and attached lists) 495 * - per-cpu hardware state 496 * - the 'hook' variable through which the kernel calls into 497 * this module 498 * - the machine hardware state (managed by the MD layer) 499 * 500 * These data structures are accessed from: 501 * 502 * - thread context-switch code 503 * - interrupt handlers (possibly on multiple cpus) 504 * - kernel threads on multiple cpus running on behalf of user 505 * processes doing system calls 506 * - this driver's private kernel threads 507 * 508 * = Locks and Locking strategy = 509 * 510 * The driver uses four locking strategies for its operation: 511 * 512 * - The global SX lock "pmc_sx" is used to protect internal 513 * data structures. 514 * 515 * Calls into the module by syscall() start with this lock being 516 * held in exclusive mode. Depending on the requested operation, 517 * the lock may be downgraded to 'shared' mode to allow more 518 * concurrent readers into the module. Calls into the module from 519 * other parts of the kernel acquire the lock in shared mode. 520 * 521 * This SX lock is held in exclusive mode for any operations that 522 * modify the linkages between the driver's internal data structures. 523 * 524 * The 'pmc_hook' function pointer is also protected by this lock. 525 * It is only examined with the sx lock held in exclusive mode. The 526 * kernel module is allowed to be unloaded only with the sx lock held 527 * in exclusive mode. In normal syscall handling, after acquiring the 528 * pmc_sx lock we first check that 'pmc_hook' is non-null before 529 * proceeding. This prevents races between the thread unloading the module 530 * and other threads seeking to use the module. 531 * 532 * - Lookups of target process structures and owner process structures 533 * cannot use the global "pmc_sx" SX lock because these lookups need 534 * to happen during context switches and in other critical sections 535 * where sleeping is not allowed. We protect these lookup tables 536 * with their own private spin-mutexes, "pmc_processhash_mtx" and 537 * "pmc_ownerhash_mtx". 538 * 539 * - Interrupt handlers work in a lock free manner. At interrupt 540 * time, handlers look at the PMC pointer (phw->phw_pmc) configured 541 * when the PMC was started. If this pointer is NULL, the interrupt 542 * is ignored after updating driver statistics. We ensure that this 543 * pointer is set (using an atomic operation if necessary) before the 544 * PMC hardware is started. Conversely, this pointer is unset atomically 545 * only after the PMC hardware is stopped. 546 * 547 * We ensure that everything needed for the operation of an 548 * interrupt handler is available without it needing to acquire any 549 * locks. We also ensure that a PMC's software state is destroyed only 550 * after the PMC is taken off hardware (on all CPUs). 551 * 552 * - Context-switch handling with process-private PMCs needs more 553 * care. 554 * 555 * A given process may be the target of multiple PMCs. For example, 556 * PMCATTACH and PMCDETACH may be requested by a process on one CPU 557 * while the target process is running on another. A PMC could also 558 * be getting released because its owner is exiting. We tackle 559 * these situations in the following manner: 560 * 561 * - each target process structure 'pmc_process' has an array 562 * of 'struct pmc *' pointers, one for each hardware PMC. 563 * 564 * - At context switch IN time, each "target" PMC in RUNNING state 565 * gets started on hardware and a pointer to each PMC is copied into 566 * the per-cpu phw array. The 'runcount' for the PMC is 567 * incremented. 568 * 569 * - At context switch OUT time, all process-virtual PMCs are stopped 570 * on hardware. The saved value is added to the PMCs value field 571 * only if the PMC is in a non-deleted state (the PMCs state could 572 * have changed during the current time slice). 573 * 574 * Note that since in-between a switch IN on a processor and a switch 575 * OUT, the PMC could have been released on another CPU. Therefore 576 * context switch OUT always looks at the hardware state to turn 577 * OFF PMCs and will update a PMC's saved value only if reachable 578 * from the target process record. 579 * 580 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could 581 * be attached to many processes at the time of the call and could 582 * be active on multiple CPUs). 583 * 584 * We prevent further scheduling of the PMC by marking it as in 585 * state 'DELETED'. If the runcount of the PMC is non-zero then 586 * this PMC is currently running on a CPU somewhere. The thread 587 * doing the PMCRELEASE operation waits by repeatedly doing a 588 * pause() till the runcount comes to zero. 589 * 590 * The contents of a PMC descriptor (struct pmc) are protected using 591 * a spin-mutex. In order to save space, we use a mutex pool. 592 * 593 * In terms of lock types used by witness(4), we use: 594 * - Type "pmc-sx", used by the global SX lock. 595 * - Type "pmc-sleep", for sleep mutexes used by logger threads. 596 * - Type "pmc-per-proc", for protecting PMC owner descriptors. 597 * - Type "pmc-leaf", used for all other spin mutexes. 598 */ 599 600 /* 601 * save the cpu binding of the current kthread 602 */ 603 604 static void 605 pmc_save_cpu_binding(struct pmc_binding *pb) 606 { 607 PMCDBG(CPU,BND,2, "%s", "save-cpu"); 608 thread_lock(curthread); 609 pb->pb_bound = sched_is_bound(curthread); 610 pb->pb_cpu = curthread->td_oncpu; 611 thread_unlock(curthread); 612 PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); 613 } 614 615 /* 616 * restore the cpu binding of the current thread 617 */ 618 619 static void 620 pmc_restore_cpu_binding(struct pmc_binding *pb) 621 { 622 PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", 623 curthread->td_oncpu, pb->pb_cpu); 624 thread_lock(curthread); 625 if (pb->pb_bound) 626 sched_bind(curthread, pb->pb_cpu); 627 else 628 sched_unbind(curthread); 629 thread_unlock(curthread); 630 PMCDBG(CPU,BND,2, "%s", "restore-cpu done"); 631 } 632 633 /* 634 * move execution over the specified cpu and bind it there. 635 */ 636 637 static void 638 pmc_select_cpu(int cpu) 639 { 640 KASSERT(cpu >= 0 && cpu < mp_ncpus, 641 ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); 642 643 /* never move to a disabled CPU */ 644 KASSERT(pmc_cpu_is_disabled(cpu) == 0, ("[pmc,%d] selecting " 645 "disabled CPU %d", __LINE__, cpu)); 646 647 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu); 648 thread_lock(curthread); 649 sched_bind(curthread, cpu); 650 thread_unlock(curthread); 651 652 KASSERT(curthread->td_oncpu == cpu, 653 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, 654 cpu, curthread->td_oncpu)); 655 656 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); 657 } 658 659 /* 660 * Force a context switch. 661 * 662 * We do this by pause'ing for 1 tick -- invoking mi_switch() is not 663 * guaranteed to force a context switch. 664 */ 665 666 static void 667 pmc_force_context_switch(void) 668 { 669 670 pause("pmcctx", 1); 671 } 672 673 /* 674 * Get the file name for an executable. This is a simple wrapper 675 * around vn_fullpath(9). 676 */ 677 678 static void 679 pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) 680 { 681 struct thread *td; 682 683 td = curthread; 684 *fullpath = "unknown"; 685 *freepath = NULL; 686 vn_lock(v, LK_CANRECURSE | LK_EXCLUSIVE | LK_RETRY, td); 687 vn_fullpath(td, v, fullpath, freepath); 688 VOP_UNLOCK(v, 0, td); 689 } 690 691 /* 692 * remove an process owning PMCs 693 */ 694 695 void 696 pmc_remove_owner(struct pmc_owner *po) 697 { 698 struct pmc *pm, *tmp; 699 700 sx_assert(&pmc_sx, SX_XLOCKED); 701 702 PMCDBG(OWN,ORM,1, "remove-owner po=%p", po); 703 704 /* Remove descriptor from the owner hash table */ 705 LIST_REMOVE(po, po_next); 706 707 /* release all owned PMC descriptors */ 708 LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { 709 PMCDBG(OWN,ORM,2, "pmc=%p", pm); 710 KASSERT(pm->pm_owner == po, 711 ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); 712 713 pmc_release_pmc_descriptor(pm); /* will unlink from the list */ 714 } 715 716 KASSERT(po->po_sscount == 0, 717 ("[pmc,%d] SS count not zero", __LINE__)); 718 KASSERT(LIST_EMPTY(&po->po_pmcs), 719 ("[pmc,%d] PMC list not empty", __LINE__)); 720 721 /* de-configure the log file if present */ 722 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 723 pmclog_deconfigure_log(po); 724 } 725 726 /* 727 * remove an owner process record if all conditions are met. 728 */ 729 730 static void 731 pmc_maybe_remove_owner(struct pmc_owner *po) 732 { 733 734 PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po); 735 736 /* 737 * Remove owner record if 738 * - this process does not own any PMCs 739 * - this process has not allocated a system-wide sampling buffer 740 */ 741 742 if (LIST_EMPTY(&po->po_pmcs) && 743 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { 744 pmc_remove_owner(po); 745 pmc_destroy_owner_descriptor(po); 746 } 747 } 748 749 /* 750 * Add an association between a target process and a PMC. 751 */ 752 753 static void 754 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) 755 { 756 int ri; 757 struct pmc_target *pt; 758 759 sx_assert(&pmc_sx, SX_XLOCKED); 760 761 KASSERT(pm != NULL && pp != NULL, 762 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 763 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 764 ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", 765 __LINE__, pm, pp->pp_proc->p_pid)); 766 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < ((int) md->pmd_npmc - 1), 767 ("[pmc,%d] Illegal reference count %d for process record %p", 768 __LINE__, pp->pp_refcnt, (void *) pp)); 769 770 ri = PMC_TO_ROWINDEX(pm); 771 772 PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", 773 pm, ri, pp); 774 775 #ifdef DEBUG 776 LIST_FOREACH(pt, &pm->pm_targets, pt_next) 777 if (pt->pt_process == pp) 778 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", 779 __LINE__, pp, pm)); 780 #endif 781 782 MALLOC(pt, struct pmc_target *, sizeof(struct pmc_target), 783 M_PMC, M_ZERO|M_WAITOK); 784 785 pt->pt_process = pp; 786 787 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); 788 789 atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, 790 (uintptr_t)pm); 791 792 if (pm->pm_owner->po_owner == pp->pp_proc) 793 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; 794 795 /* 796 * Initialize the per-process values at this row index. 797 */ 798 pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? 799 pm->pm_sc.pm_reloadcount : 0; 800 801 pp->pp_refcnt++; 802 803 } 804 805 /* 806 * Removes the association between a target process and a PMC. 807 */ 808 809 static void 810 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) 811 { 812 int ri; 813 struct proc *p; 814 struct pmc_target *ptgt; 815 816 sx_assert(&pmc_sx, SX_XLOCKED); 817 818 KASSERT(pm != NULL && pp != NULL, 819 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 820 821 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt < (int) md->pmd_npmc, 822 ("[pmc,%d] Illegal ref count %d on process record %p", 823 __LINE__, pp->pp_refcnt, (void *) pp)); 824 825 ri = PMC_TO_ROWINDEX(pm); 826 827 PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", 828 pm, ri, pp); 829 830 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, 831 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, 832 ri, pm, pp->pp_pmcs[ri].pp_pmc)); 833 834 pp->pp_pmcs[ri].pp_pmc = NULL; 835 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; 836 837 /* Remove owner-specific flags */ 838 if (pm->pm_owner->po_owner == pp->pp_proc) { 839 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; 840 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; 841 } 842 843 pp->pp_refcnt--; 844 845 /* Remove the target process from the PMC structure */ 846 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) 847 if (ptgt->pt_process == pp) 848 break; 849 850 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " 851 "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); 852 853 LIST_REMOVE(ptgt, pt_next); 854 FREE(ptgt, M_PMC); 855 856 /* if the PMC now lacks targets, send the owner a SIGIO */ 857 if (LIST_EMPTY(&pm->pm_targets)) { 858 p = pm->pm_owner->po_owner; 859 PROC_LOCK(p); 860 psignal(p, SIGIO); 861 PROC_UNLOCK(p); 862 863 PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p, 864 SIGIO); 865 } 866 } 867 868 /* 869 * Check if PMC 'pm' may be attached to target process 't'. 870 */ 871 872 static int 873 pmc_can_attach(struct pmc *pm, struct proc *t) 874 { 875 struct proc *o; /* pmc owner */ 876 struct ucred *oc, *tc; /* owner, target credentials */ 877 int decline_attach, i; 878 879 /* 880 * A PMC's owner can always attach that PMC to itself. 881 */ 882 883 if ((o = pm->pm_owner->po_owner) == t) 884 return 0; 885 886 PROC_LOCK(o); 887 oc = o->p_ucred; 888 crhold(oc); 889 PROC_UNLOCK(o); 890 891 PROC_LOCK(t); 892 tc = t->p_ucred; 893 crhold(tc); 894 PROC_UNLOCK(t); 895 896 /* 897 * The effective uid of the PMC owner should match at least one 898 * of the {effective,real,saved} uids of the target process. 899 */ 900 901 decline_attach = oc->cr_uid != tc->cr_uid && 902 oc->cr_uid != tc->cr_svuid && 903 oc->cr_uid != tc->cr_ruid; 904 905 /* 906 * Every one of the target's group ids, must be in the owner's 907 * group list. 908 */ 909 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) 910 decline_attach = !groupmember(tc->cr_groups[i], oc); 911 912 /* check the read and saved gids too */ 913 if (decline_attach == 0) 914 decline_attach = !groupmember(tc->cr_rgid, oc) || 915 !groupmember(tc->cr_svgid, oc); 916 917 crfree(tc); 918 crfree(oc); 919 920 return !decline_attach; 921 } 922 923 /* 924 * Attach a process to a PMC. 925 */ 926 927 static int 928 pmc_attach_one_process(struct proc *p, struct pmc *pm) 929 { 930 int ri; 931 char *fullpath, *freepath; 932 struct pmc_process *pp; 933 934 sx_assert(&pmc_sx, SX_XLOCKED); 935 936 PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, 937 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 938 939 /* 940 * Locate the process descriptor corresponding to process 'p', 941 * allocating space as needed. 942 * 943 * Verify that rowindex 'pm_rowindex' is free in the process 944 * descriptor. 945 * 946 * If not, allocate space for a descriptor and link the 947 * process descriptor and PMC. 948 */ 949 ri = PMC_TO_ROWINDEX(pm); 950 951 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) 952 return ENOMEM; 953 954 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ 955 return EEXIST; 956 957 if (pp->pp_pmcs[ri].pp_pmc != NULL) 958 return EBUSY; 959 960 pmc_link_target_process(pm, pp); 961 962 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && 963 (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) 964 pm->pm_flags |= PMC_F_NEEDS_LOGFILE; 965 966 pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ 967 968 /* issue an attach event to a configured log file */ 969 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { 970 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 971 pmclog_process_pmcattach(pm, p->p_pid, fullpath); 972 if (freepath) 973 FREE(freepath, M_TEMP); 974 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 975 pmc_log_process_mappings(pm->pm_owner, p); 976 } 977 /* mark process as using HWPMCs */ 978 PROC_LOCK(p); 979 p->p_flag |= P_HWPMC; 980 PROC_UNLOCK(p); 981 982 return 0; 983 } 984 985 /* 986 * Attach a process and optionally its children 987 */ 988 989 static int 990 pmc_attach_process(struct proc *p, struct pmc *pm) 991 { 992 int error; 993 struct proc *top; 994 995 sx_assert(&pmc_sx, SX_XLOCKED); 996 997 PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, 998 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 999 1000 1001 /* 1002 * If this PMC successfully allowed a GETMSR operation 1003 * in the past, disallow further ATTACHes. 1004 */ 1005 1006 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) 1007 return EPERM; 1008 1009 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1010 return pmc_attach_one_process(p, pm); 1011 1012 /* 1013 * Traverse all child processes, attaching them to 1014 * this PMC. 1015 */ 1016 1017 sx_slock(&proctree_lock); 1018 1019 top = p; 1020 1021 for (;;) { 1022 if ((error = pmc_attach_one_process(p, pm)) != 0) 1023 break; 1024 if (!LIST_EMPTY(&p->p_children)) 1025 p = LIST_FIRST(&p->p_children); 1026 else for (;;) { 1027 if (p == top) 1028 goto done; 1029 if (LIST_NEXT(p, p_sibling)) { 1030 p = LIST_NEXT(p, p_sibling); 1031 break; 1032 } 1033 p = p->p_pptr; 1034 } 1035 } 1036 1037 if (error) 1038 (void) pmc_detach_process(top, pm); 1039 1040 done: 1041 sx_sunlock(&proctree_lock); 1042 return error; 1043 } 1044 1045 /* 1046 * Detach a process from a PMC. If there are no other PMCs tracking 1047 * this process, remove the process structure from its hash table. If 1048 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. 1049 */ 1050 1051 static int 1052 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) 1053 { 1054 int ri; 1055 struct pmc_process *pp; 1056 1057 sx_assert(&pmc_sx, SX_XLOCKED); 1058 1059 KASSERT(pm != NULL, 1060 ("[pmc,%d] null pm pointer", __LINE__)); 1061 1062 ri = PMC_TO_ROWINDEX(pm); 1063 1064 PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", 1065 pm, ri, p, p->p_pid, p->p_comm, flags); 1066 1067 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) 1068 return ESRCH; 1069 1070 if (pp->pp_pmcs[ri].pp_pmc != pm) 1071 return EINVAL; 1072 1073 pmc_unlink_target_process(pm, pp); 1074 1075 /* Issue a detach entry if a log file is configured */ 1076 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) 1077 pmclog_process_pmcdetach(pm, p->p_pid); 1078 1079 /* 1080 * If there are no PMCs targetting this process, we remove its 1081 * descriptor from the target hash table and unset the P_HWPMC 1082 * flag in the struct proc. 1083 */ 1084 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1085 ("[pmc,%d] Illegal refcnt %d for process struct %p", 1086 __LINE__, pp->pp_refcnt, pp)); 1087 1088 if (pp->pp_refcnt != 0) /* still a target of some PMC */ 1089 return 0; 1090 1091 pmc_remove_process_descriptor(pp); 1092 1093 if (flags & PMC_FLAG_REMOVE) 1094 FREE(pp, M_PMC); 1095 1096 PROC_LOCK(p); 1097 p->p_flag &= ~P_HWPMC; 1098 PROC_UNLOCK(p); 1099 1100 return 0; 1101 } 1102 1103 /* 1104 * Detach a process and optionally its descendants from a PMC. 1105 */ 1106 1107 static int 1108 pmc_detach_process(struct proc *p, struct pmc *pm) 1109 { 1110 struct proc *top; 1111 1112 sx_assert(&pmc_sx, SX_XLOCKED); 1113 1114 PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, 1115 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1116 1117 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1118 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1119 1120 /* 1121 * Traverse all children, detaching them from this PMC. We 1122 * ignore errors since we could be detaching a PMC from a 1123 * partially attached proc tree. 1124 */ 1125 1126 sx_slock(&proctree_lock); 1127 1128 top = p; 1129 1130 for (;;) { 1131 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1132 1133 if (!LIST_EMPTY(&p->p_children)) 1134 p = LIST_FIRST(&p->p_children); 1135 else for (;;) { 1136 if (p == top) 1137 goto done; 1138 if (LIST_NEXT(p, p_sibling)) { 1139 p = LIST_NEXT(p, p_sibling); 1140 break; 1141 } 1142 p = p->p_pptr; 1143 } 1144 } 1145 1146 done: 1147 sx_sunlock(&proctree_lock); 1148 1149 if (LIST_EMPTY(&pm->pm_targets)) 1150 pm->pm_flags &= ~PMC_F_ATTACH_DONE; 1151 1152 return 0; 1153 } 1154 1155 1156 /* 1157 * Thread context switch IN 1158 */ 1159 1160 static void 1161 pmc_process_csw_in(struct thread *td) 1162 { 1163 int cpu; 1164 unsigned int ri; 1165 struct pmc *pm; 1166 struct proc *p; 1167 struct pmc_cpu *pc; 1168 struct pmc_hw *phw; 1169 struct pmc_process *pp; 1170 pmc_value_t newvalue; 1171 1172 p = td->td_proc; 1173 1174 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) 1175 return; 1176 1177 KASSERT(pp->pp_proc == td->td_proc, 1178 ("[pmc,%d] not my thread state", __LINE__)); 1179 1180 critical_enter(); /* no preemption from this point */ 1181 1182 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1183 1184 PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1185 p->p_pid, p->p_comm, pp); 1186 1187 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1188 ("[pmc,%d] wierd CPU id %d", __LINE__, cpu)); 1189 1190 pc = pmc_pcpu[cpu]; 1191 1192 for (ri = 0; ri < md->pmd_npmc; ri++) { 1193 1194 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) 1195 continue; 1196 1197 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 1198 ("[pmc,%d] Target PMC in non-virtual mode (%d)", 1199 __LINE__, PMC_TO_MODE(pm))); 1200 1201 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1202 ("[pmc,%d] Row index mismatch pmc %d != ri %d", 1203 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1204 1205 /* 1206 * Only PMCs that are marked as 'RUNNING' need 1207 * be placed on hardware. 1208 */ 1209 1210 if (pm->pm_state != PMC_STATE_RUNNING) 1211 continue; 1212 1213 /* increment PMC runcount */ 1214 atomic_add_rel_32(&pm->pm_runcount, 1); 1215 1216 /* configure the HWPMC we are going to use. */ 1217 md->pmd_config_pmc(cpu, ri, pm); 1218 1219 phw = pc->pc_hwpmcs[ri]; 1220 1221 KASSERT(phw != NULL, 1222 ("[pmc,%d] null hw pointer", __LINE__)); 1223 1224 KASSERT(phw->phw_pmc == pm, 1225 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, 1226 phw->phw_pmc, pm)); 1227 1228 /* 1229 * Write out saved value and start the PMC. 1230 * 1231 * Sampling PMCs use a per-process value, while 1232 * counting mode PMCs use a per-pmc value that is 1233 * inherited across descendants. 1234 */ 1235 if (PMC_TO_MODE(pm) == PMC_MODE_TS) { 1236 mtx_pool_lock_spin(pmc_mtxpool, pm); 1237 newvalue = PMC_PCPU_SAVED(cpu,ri) = 1238 pp->pp_pmcs[ri].pp_pmcval; 1239 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1240 } else { 1241 KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, 1242 ("[pmc,%d] illegal mode=%d", __LINE__, 1243 PMC_TO_MODE(pm))); 1244 mtx_pool_lock_spin(pmc_mtxpool, pm); 1245 newvalue = PMC_PCPU_SAVED(cpu, ri) = 1246 pm->pm_gv.pm_savedvalue; 1247 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1248 } 1249 1250 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); 1251 1252 md->pmd_write_pmc(cpu, ri, newvalue); 1253 md->pmd_start_pmc(cpu, ri); 1254 } 1255 1256 /* 1257 * perform any other architecture/cpu dependent thread 1258 * switch-in actions. 1259 */ 1260 1261 (void) (*md->pmd_switch_in)(pc, pp); 1262 1263 critical_exit(); 1264 1265 } 1266 1267 /* 1268 * Thread context switch OUT. 1269 */ 1270 1271 static void 1272 pmc_process_csw_out(struct thread *td) 1273 { 1274 int cpu; 1275 enum pmc_mode mode; 1276 unsigned int ri; 1277 struct pmc *pm; 1278 struct proc *p; 1279 struct pmc_cpu *pc; 1280 struct pmc_process *pp; 1281 int64_t tmp; 1282 pmc_value_t newvalue; 1283 1284 /* 1285 * Locate our process descriptor; this may be NULL if 1286 * this process is exiting and we have already removed 1287 * the process from the target process table. 1288 * 1289 * Note that due to kernel preemption, multiple 1290 * context switches may happen while the process is 1291 * exiting. 1292 * 1293 * Note also that if the target process cannot be 1294 * found we still need to deconfigure any PMCs that 1295 * are currently running on hardware. 1296 */ 1297 1298 p = td->td_proc; 1299 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); 1300 1301 /* 1302 * save PMCs 1303 */ 1304 1305 critical_enter(); 1306 1307 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1308 1309 PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1310 p->p_pid, p->p_comm, pp); 1311 1312 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1313 ("[pmc,%d wierd CPU id %d", __LINE__, cpu)); 1314 1315 pc = pmc_pcpu[cpu]; 1316 1317 /* 1318 * When a PMC gets unlinked from a target PMC, it will 1319 * be removed from the target's pp_pmc[] array. 1320 * 1321 * However, on a MP system, the target could have been 1322 * executing on another CPU at the time of the unlink. 1323 * So, at context switch OUT time, we need to look at 1324 * the hardware to determine if a PMC is scheduled on 1325 * it. 1326 */ 1327 1328 for (ri = 0; ri < md->pmd_npmc; ri++) { 1329 1330 pm = NULL; 1331 (void) (*md->pmd_get_config)(cpu, ri, &pm); 1332 1333 if (pm == NULL) /* nothing at this row index */ 1334 continue; 1335 1336 mode = PMC_TO_MODE(pm); 1337 if (!PMC_IS_VIRTUAL_MODE(mode)) 1338 continue; /* not a process virtual PMC */ 1339 1340 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1341 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 1342 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1343 1344 /* Stop hardware if not already stopped */ 1345 if (pm->pm_stalled == 0) 1346 md->pmd_stop_pmc(cpu, ri); 1347 1348 /* reduce this PMC's runcount */ 1349 atomic_subtract_rel_32(&pm->pm_runcount, 1); 1350 1351 /* 1352 * If this PMC is associated with this process, 1353 * save the reading. 1354 */ 1355 1356 if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { 1357 1358 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 1359 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, 1360 pm, ri, pp->pp_pmcs[ri].pp_pmc)); 1361 1362 KASSERT(pp->pp_refcnt > 0, 1363 ("[pmc,%d] pp refcnt = %d", __LINE__, 1364 pp->pp_refcnt)); 1365 1366 md->pmd_read_pmc(cpu, ri, &newvalue); 1367 1368 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); 1369 1370 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d tmp=%jd", cpu, ri, 1371 tmp); 1372 1373 if (mode == PMC_MODE_TS) { 1374 1375 /* 1376 * For sampling process-virtual PMCs, 1377 * we expect the count to be 1378 * decreasing as the 'value' 1379 * programmed into the PMC is the 1380 * number of events to be seen till 1381 * the next sampling interrupt. 1382 */ 1383 if (tmp < 0) 1384 tmp += pm->pm_sc.pm_reloadcount; 1385 mtx_pool_lock_spin(pmc_mtxpool, pm); 1386 pp->pp_pmcs[ri].pp_pmcval -= tmp; 1387 if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0) 1388 pp->pp_pmcs[ri].pp_pmcval += 1389 pm->pm_sc.pm_reloadcount; 1390 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1391 1392 } else { 1393 1394 /* 1395 * For counting process-virtual PMCs, 1396 * we expect the count to be 1397 * increasing monotonically, modulo a 64 1398 * bit wraparound. 1399 */ 1400 KASSERT((int64_t) tmp >= 0, 1401 ("[pmc,%d] negative increment cpu=%d " 1402 "ri=%d newvalue=%jx saved=%jx " 1403 "incr=%jx", __LINE__, cpu, ri, 1404 newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); 1405 1406 mtx_pool_lock_spin(pmc_mtxpool, pm); 1407 pm->pm_gv.pm_savedvalue += tmp; 1408 pp->pp_pmcs[ri].pp_pmcval += tmp; 1409 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1410 1411 if (pm->pm_flags & PMC_F_LOG_PROCCSW) 1412 pmclog_process_proccsw(pm, pp, tmp); 1413 } 1414 } 1415 1416 /* mark hardware as free */ 1417 md->pmd_config_pmc(cpu, ri, NULL); 1418 } 1419 1420 /* 1421 * perform any other architecture/cpu dependent thread 1422 * switch out functions. 1423 */ 1424 1425 (void) (*md->pmd_switch_out)(pc, pp); 1426 1427 critical_exit(); 1428 } 1429 1430 /* 1431 * Log a KLD operation. 1432 */ 1433 1434 static void 1435 pmc_process_kld_load(struct pmckern_map_in *pkm) 1436 { 1437 struct pmc_owner *po; 1438 1439 sx_assert(&pmc_sx, SX_LOCKED); 1440 1441 /* 1442 * Notify owners of system sampling PMCs about KLD operations. 1443 */ 1444 1445 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1446 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1447 pmclog_process_map_in(po, (pid_t) -1, pkm->pm_address, 1448 (char *) pkm->pm_file); 1449 1450 /* 1451 * TODO: Notify owners of (all) process-sampling PMCs too. 1452 */ 1453 1454 return; 1455 } 1456 1457 static void 1458 pmc_process_kld_unload(struct pmckern_map_out *pkm) 1459 { 1460 struct pmc_owner *po; 1461 1462 sx_assert(&pmc_sx, SX_LOCKED); 1463 1464 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1465 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1466 pmclog_process_map_out(po, (pid_t) -1, 1467 pkm->pm_address, pkm->pm_address + pkm->pm_size); 1468 1469 /* 1470 * TODO: Notify owners of process-sampling PMCs. 1471 */ 1472 } 1473 1474 /* 1475 * A mapping change for a process. 1476 */ 1477 1478 static void 1479 pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) 1480 { 1481 int ri; 1482 pid_t pid; 1483 char *fullpath, *freepath; 1484 const struct pmc *pm; 1485 struct pmc_owner *po; 1486 const struct pmc_process *pp; 1487 1488 freepath = fullpath = NULL; 1489 pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); 1490 1491 pid = td->td_proc->p_pid; 1492 1493 /* Inform owners of all system-wide sampling PMCs. */ 1494 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1495 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1496 pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); 1497 1498 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1499 goto done; 1500 1501 /* 1502 * Inform sampling PMC owners tracking this process. 1503 */ 1504 for (ri = 0; ri < md->pmd_npmc; ri++) 1505 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1506 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1507 pmclog_process_map_in(pm->pm_owner, 1508 pid, pkm->pm_address, fullpath); 1509 1510 done: 1511 if (freepath) 1512 FREE(freepath, M_TEMP); 1513 } 1514 1515 1516 /* 1517 * Log an munmap request. 1518 */ 1519 1520 static void 1521 pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) 1522 { 1523 int ri; 1524 pid_t pid; 1525 struct pmc_owner *po; 1526 const struct pmc *pm; 1527 const struct pmc_process *pp; 1528 1529 pid = td->td_proc->p_pid; 1530 1531 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1532 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1533 pmclog_process_map_out(po, pid, pkm->pm_address, 1534 pkm->pm_address + pkm->pm_size); 1535 1536 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1537 return; 1538 1539 for (ri = 0; ri < md->pmd_npmc; ri++) 1540 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1541 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1542 pmclog_process_map_out(pm->pm_owner, pid, 1543 pkm->pm_address, pkm->pm_address + pkm->pm_size); 1544 } 1545 1546 /* 1547 * Log mapping information about the kernel. 1548 */ 1549 1550 static void 1551 pmc_log_kernel_mappings(struct pmc *pm) 1552 { 1553 struct pmc_owner *po; 1554 struct pmckern_map_in *km, *kmbase; 1555 1556 sx_assert(&pmc_sx, SX_LOCKED); 1557 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 1558 ("[pmc,%d] non-sampling PMC (%p) desires mapping information", 1559 __LINE__, (void *) pm)); 1560 1561 po = pm->pm_owner; 1562 1563 if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) 1564 return; 1565 1566 /* 1567 * Log the current set of kernel modules. 1568 */ 1569 kmbase = linker_hwpmc_list_objects(); 1570 for (km = kmbase; km->pm_file != NULL; km++) { 1571 PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file, 1572 (void *) km->pm_address); 1573 pmclog_process_map_in(po, (pid_t) -1, km->pm_address, 1574 km->pm_file); 1575 } 1576 FREE(kmbase, M_LINKER); 1577 1578 po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; 1579 } 1580 1581 /* 1582 * Log the mappings for a single process. 1583 */ 1584 1585 static void 1586 pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) 1587 { 1588 } 1589 1590 /* 1591 * Log mappings for all processes in the system. 1592 */ 1593 1594 static void 1595 pmc_log_all_process_mappings(struct pmc_owner *po) 1596 { 1597 struct proc *p, *top; 1598 1599 sx_assert(&pmc_sx, SX_XLOCKED); 1600 1601 if ((p = pfind(1)) == NULL) 1602 panic("[pmc,%d] Cannot find init", __LINE__); 1603 1604 PROC_UNLOCK(p); 1605 1606 sx_slock(&proctree_lock); 1607 1608 top = p; 1609 1610 for (;;) { 1611 pmc_log_process_mappings(po, p); 1612 if (!LIST_EMPTY(&p->p_children)) 1613 p = LIST_FIRST(&p->p_children); 1614 else for (;;) { 1615 if (p == top) 1616 goto done; 1617 if (LIST_NEXT(p, p_sibling)) { 1618 p = LIST_NEXT(p, p_sibling); 1619 break; 1620 } 1621 p = p->p_pptr; 1622 } 1623 } 1624 done: 1625 sx_sunlock(&proctree_lock); 1626 } 1627 1628 /* 1629 * The 'hook' invoked from the kernel proper 1630 */ 1631 1632 1633 #ifdef DEBUG 1634 const char *pmc_hooknames[] = { 1635 /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */ 1636 "", 1637 "EXEC", 1638 "CSW-IN", 1639 "CSW-OUT", 1640 "SAMPLE", 1641 "KLDLOAD", 1642 "KLDUNLOAD", 1643 "MMAP", 1644 "MUNMAP", 1645 "CALLCHAIN" 1646 }; 1647 #endif 1648 1649 static int 1650 pmc_hook_handler(struct thread *td, int function, void *arg) 1651 { 1652 1653 PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, 1654 pmc_hooknames[function], arg); 1655 1656 switch (function) 1657 { 1658 1659 /* 1660 * Process exec() 1661 */ 1662 1663 case PMC_FN_PROCESS_EXEC: 1664 { 1665 char *fullpath, *freepath; 1666 unsigned int ri; 1667 int is_using_hwpmcs; 1668 struct pmc *pm; 1669 struct proc *p; 1670 struct pmc_owner *po; 1671 struct pmc_process *pp; 1672 struct pmckern_procexec *pk; 1673 1674 sx_assert(&pmc_sx, SX_XLOCKED); 1675 1676 p = td->td_proc; 1677 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 1678 1679 pk = (struct pmckern_procexec *) arg; 1680 1681 /* Inform owners of SS mode PMCs of the exec event. */ 1682 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1683 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1684 pmclog_process_procexec(po, PMC_ID_INVALID, 1685 p->p_pid, pk->pm_entryaddr, fullpath); 1686 1687 PROC_LOCK(p); 1688 is_using_hwpmcs = p->p_flag & P_HWPMC; 1689 PROC_UNLOCK(p); 1690 1691 if (!is_using_hwpmcs) { 1692 if (freepath) 1693 FREE(freepath, M_TEMP); 1694 break; 1695 } 1696 1697 /* 1698 * PMCs are not inherited across an exec(): remove any 1699 * PMCs that this process is the owner of. 1700 */ 1701 1702 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 1703 pmc_remove_owner(po); 1704 pmc_destroy_owner_descriptor(po); 1705 } 1706 1707 /* 1708 * If the process being exec'ed is not the target of any 1709 * PMC, we are done. 1710 */ 1711 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { 1712 if (freepath) 1713 FREE(freepath, M_TEMP); 1714 break; 1715 } 1716 1717 /* 1718 * Log the exec event to all monitoring owners. Skip 1719 * owners who have already recieved the event because 1720 * they had system sampling PMCs active. 1721 */ 1722 for (ri = 0; ri < md->pmd_npmc; ri++) 1723 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 1724 po = pm->pm_owner; 1725 if (po->po_sscount == 0 && 1726 po->po_flags & PMC_PO_OWNS_LOGFILE) 1727 pmclog_process_procexec(po, pm->pm_id, 1728 p->p_pid, pk->pm_entryaddr, 1729 fullpath); 1730 } 1731 1732 if (freepath) 1733 FREE(freepath, M_TEMP); 1734 1735 1736 PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", 1737 p, p->p_pid, p->p_comm, pk->pm_credentialschanged); 1738 1739 if (pk->pm_credentialschanged == 0) /* no change */ 1740 break; 1741 1742 /* 1743 * If the newly exec()'ed process has a different credential 1744 * than before, allow it to be the target of a PMC only if 1745 * the PMC's owner has sufficient priviledge. 1746 */ 1747 1748 for (ri = 0; ri < md->pmd_npmc; ri++) 1749 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) 1750 if (pmc_can_attach(pm, td->td_proc) != 0) 1751 pmc_detach_one_process(td->td_proc, 1752 pm, PMC_FLAG_NONE); 1753 1754 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1755 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, 1756 pp->pp_refcnt, pp)); 1757 1758 /* 1759 * If this process is no longer the target of any 1760 * PMCs, we can remove the process entry and free 1761 * up space. 1762 */ 1763 1764 if (pp->pp_refcnt == 0) { 1765 pmc_remove_process_descriptor(pp); 1766 FREE(pp, M_PMC); 1767 break; 1768 } 1769 1770 } 1771 break; 1772 1773 case PMC_FN_CSW_IN: 1774 pmc_process_csw_in(td); 1775 break; 1776 1777 case PMC_FN_CSW_OUT: 1778 pmc_process_csw_out(td); 1779 break; 1780 1781 /* 1782 * Process accumulated PC samples. 1783 * 1784 * This function is expected to be called by hardclock() for 1785 * each CPU that has accumulated PC samples. 1786 * 1787 * This function is to be executed on the CPU whose samples 1788 * are being processed. 1789 */ 1790 case PMC_FN_DO_SAMPLES: 1791 1792 /* 1793 * Clear the cpu specific bit in the CPU mask before 1794 * do the rest of the processing. If the NMI handler 1795 * gets invoked after the "atomic_clear_int()" call 1796 * below but before "pmc_process_samples()" gets 1797 * around to processing the interrupt, then we will 1798 * come back here at the next hardclock() tick (and 1799 * may find nothing to do if "pmc_process_samples()" 1800 * had already processed the interrupt). We don't 1801 * lose the interrupt sample. 1802 */ 1803 atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid))); 1804 pmc_process_samples(PCPU_GET(cpuid)); 1805 break; 1806 1807 1808 case PMC_FN_KLD_LOAD: 1809 sx_assert(&pmc_sx, SX_LOCKED); 1810 pmc_process_kld_load((struct pmckern_map_in *) arg); 1811 break; 1812 1813 case PMC_FN_KLD_UNLOAD: 1814 sx_assert(&pmc_sx, SX_LOCKED); 1815 pmc_process_kld_unload((struct pmckern_map_out *) arg); 1816 break; 1817 1818 case PMC_FN_MMAP: 1819 sx_assert(&pmc_sx, SX_LOCKED); 1820 pmc_process_mmap(td, (struct pmckern_map_in *) arg); 1821 break; 1822 1823 case PMC_FN_MUNMAP: 1824 sx_assert(&pmc_sx, SX_LOCKED); 1825 pmc_process_munmap(td, (struct pmckern_map_out *) arg); 1826 break; 1827 1828 case PMC_FN_USER_CALLCHAIN: 1829 /* 1830 * Record a call chain. 1831 */ 1832 pmc_capture_user_callchain(PCPU_GET(cpuid), 1833 (struct trapframe *) arg); 1834 break; 1835 1836 default: 1837 #ifdef DEBUG 1838 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); 1839 #endif 1840 break; 1841 1842 } 1843 1844 return 0; 1845 } 1846 1847 /* 1848 * allocate a 'struct pmc_owner' descriptor in the owner hash table. 1849 */ 1850 1851 static struct pmc_owner * 1852 pmc_allocate_owner_descriptor(struct proc *p) 1853 { 1854 uint32_t hindex; 1855 struct pmc_owner *po; 1856 struct pmc_ownerhash *poh; 1857 1858 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1859 poh = &pmc_ownerhash[hindex]; 1860 1861 /* allocate space for N pointers and one descriptor struct */ 1862 MALLOC(po, struct pmc_owner *, sizeof(struct pmc_owner), 1863 M_PMC, M_ZERO|M_WAITOK); 1864 1865 po->po_sscount = po->po_error = po->po_flags = 0; 1866 po->po_file = NULL; 1867 po->po_owner = p; 1868 po->po_kthread = NULL; 1869 LIST_INIT(&po->po_pmcs); 1870 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ 1871 1872 TAILQ_INIT(&po->po_logbuffers); 1873 mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); 1874 1875 PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", 1876 p, p->p_pid, p->p_comm, po); 1877 1878 return po; 1879 } 1880 1881 static void 1882 pmc_destroy_owner_descriptor(struct pmc_owner *po) 1883 { 1884 1885 PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", 1886 po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); 1887 1888 mtx_destroy(&po->po_mtx); 1889 FREE(po, M_PMC); 1890 } 1891 1892 /* 1893 * find the descriptor corresponding to process 'p', adding or removing it 1894 * as specified by 'mode'. 1895 */ 1896 1897 static struct pmc_process * 1898 pmc_find_process_descriptor(struct proc *p, uint32_t mode) 1899 { 1900 uint32_t hindex; 1901 struct pmc_process *pp, *ppnew; 1902 struct pmc_processhash *pph; 1903 1904 hindex = PMC_HASH_PTR(p, pmc_processhashmask); 1905 pph = &pmc_processhash[hindex]; 1906 1907 ppnew = NULL; 1908 1909 /* 1910 * Pre-allocate memory in the FIND_ALLOCATE case since we 1911 * cannot call malloc(9) once we hold a spin lock. 1912 */ 1913 1914 if (mode & PMC_FLAG_ALLOCATE) { 1915 /* allocate additional space for 'n' pmc pointers */ 1916 MALLOC(ppnew, struct pmc_process *, 1917 sizeof(struct pmc_process) + md->pmd_npmc * 1918 sizeof(struct pmc_targetstate), M_PMC, M_ZERO|M_WAITOK); 1919 } 1920 1921 mtx_lock_spin(&pmc_processhash_mtx); 1922 LIST_FOREACH(pp, pph, pp_next) 1923 if (pp->pp_proc == p) 1924 break; 1925 1926 if ((mode & PMC_FLAG_REMOVE) && pp != NULL) 1927 LIST_REMOVE(pp, pp_next); 1928 1929 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && 1930 ppnew != NULL) { 1931 ppnew->pp_proc = p; 1932 LIST_INSERT_HEAD(pph, ppnew, pp_next); 1933 pp = ppnew; 1934 ppnew = NULL; 1935 } 1936 mtx_unlock_spin(&pmc_processhash_mtx); 1937 1938 if (pp != NULL && ppnew != NULL) 1939 FREE(ppnew, M_PMC); 1940 1941 return pp; 1942 } 1943 1944 /* 1945 * remove a process descriptor from the process hash table. 1946 */ 1947 1948 static void 1949 pmc_remove_process_descriptor(struct pmc_process *pp) 1950 { 1951 KASSERT(pp->pp_refcnt == 0, 1952 ("[pmc,%d] Removing process descriptor %p with count %d", 1953 __LINE__, pp, pp->pp_refcnt)); 1954 1955 mtx_lock_spin(&pmc_processhash_mtx); 1956 LIST_REMOVE(pp, pp_next); 1957 mtx_unlock_spin(&pmc_processhash_mtx); 1958 } 1959 1960 1961 /* 1962 * find an owner descriptor corresponding to proc 'p' 1963 */ 1964 1965 static struct pmc_owner * 1966 pmc_find_owner_descriptor(struct proc *p) 1967 { 1968 uint32_t hindex; 1969 struct pmc_owner *po; 1970 struct pmc_ownerhash *poh; 1971 1972 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1973 poh = &pmc_ownerhash[hindex]; 1974 1975 po = NULL; 1976 LIST_FOREACH(po, poh, po_next) 1977 if (po->po_owner == p) 1978 break; 1979 1980 PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " 1981 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); 1982 1983 return po; 1984 } 1985 1986 /* 1987 * pmc_allocate_pmc_descriptor 1988 * 1989 * Allocate a pmc descriptor and initialize its 1990 * fields. 1991 */ 1992 1993 static struct pmc * 1994 pmc_allocate_pmc_descriptor(void) 1995 { 1996 struct pmc *pmc; 1997 1998 MALLOC(pmc, struct pmc *, sizeof(struct pmc), M_PMC, M_ZERO|M_WAITOK); 1999 2000 if (pmc != NULL) { 2001 pmc->pm_owner = NULL; 2002 LIST_INIT(&pmc->pm_targets); 2003 } 2004 2005 PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); 2006 2007 return pmc; 2008 } 2009 2010 /* 2011 * Destroy a pmc descriptor. 2012 */ 2013 2014 static void 2015 pmc_destroy_pmc_descriptor(struct pmc *pm) 2016 { 2017 (void) pm; 2018 2019 #ifdef DEBUG 2020 KASSERT(pm->pm_state == PMC_STATE_DELETED || 2021 pm->pm_state == PMC_STATE_FREE, 2022 ("[pmc,%d] destroying non-deleted PMC", __LINE__)); 2023 KASSERT(LIST_EMPTY(&pm->pm_targets), 2024 ("[pmc,%d] destroying pmc with targets", __LINE__)); 2025 KASSERT(pm->pm_owner == NULL, 2026 ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); 2027 KASSERT(pm->pm_runcount == 0, 2028 ("[pmc,%d] pmc has non-zero run count %d", __LINE__, 2029 pm->pm_runcount)); 2030 #endif 2031 } 2032 2033 static void 2034 pmc_wait_for_pmc_idle(struct pmc *pm) 2035 { 2036 #ifdef DEBUG 2037 volatile int maxloop; 2038 2039 maxloop = 100 * mp_ncpus; 2040 #endif 2041 2042 /* 2043 * Loop (with a forced context switch) till the PMC's runcount 2044 * comes down to zero. 2045 */ 2046 while (atomic_load_acq_32(&pm->pm_runcount) > 0) { 2047 #ifdef DEBUG 2048 maxloop--; 2049 KASSERT(maxloop > 0, 2050 ("[pmc,%d] (ri%d, rc%d) waiting too long for " 2051 "pmc to be free", __LINE__, 2052 PMC_TO_ROWINDEX(pm), pm->pm_runcount)); 2053 #endif 2054 pmc_force_context_switch(); 2055 } 2056 } 2057 2058 /* 2059 * This function does the following things: 2060 * 2061 * - detaches the PMC from hardware 2062 * - unlinks all target threads that were attached to it 2063 * - removes the PMC from its owner's list 2064 * - destroy's the PMC private mutex 2065 * 2066 * Once this function completes, the given pmc pointer can be safely 2067 * FREE'd by the caller. 2068 */ 2069 2070 static void 2071 pmc_release_pmc_descriptor(struct pmc *pm) 2072 { 2073 u_int ri, cpu; 2074 enum pmc_mode mode; 2075 struct pmc_hw *phw; 2076 struct pmc_owner *po; 2077 struct pmc_process *pp; 2078 struct pmc_target *ptgt, *tmp; 2079 struct pmc_binding pb; 2080 2081 sx_assert(&pmc_sx, SX_XLOCKED); 2082 2083 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); 2084 2085 ri = PMC_TO_ROWINDEX(pm); 2086 mode = PMC_TO_MODE(pm); 2087 2088 PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, 2089 mode); 2090 2091 /* 2092 * First, we take the PMC off hardware. 2093 */ 2094 cpu = 0; 2095 if (PMC_IS_SYSTEM_MODE(mode)) { 2096 2097 /* 2098 * A system mode PMC runs on a specific CPU. Switch 2099 * to this CPU and turn hardware off. 2100 */ 2101 pmc_save_cpu_binding(&pb); 2102 2103 cpu = PMC_TO_CPU(pm); 2104 2105 pmc_select_cpu(cpu); 2106 2107 /* switch off non-stalled CPUs */ 2108 if (pm->pm_state == PMC_STATE_RUNNING && 2109 pm->pm_stalled == 0) { 2110 2111 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 2112 2113 KASSERT(phw->phw_pmc == pm, 2114 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", 2115 __LINE__, ri, phw->phw_pmc, pm)); 2116 PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); 2117 2118 critical_enter(); 2119 md->pmd_stop_pmc(cpu, ri); 2120 critical_exit(); 2121 } 2122 2123 PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); 2124 2125 critical_enter(); 2126 md->pmd_config_pmc(cpu, ri, NULL); 2127 critical_exit(); 2128 2129 /* adjust the global and process count of SS mode PMCs */ 2130 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { 2131 po = pm->pm_owner; 2132 po->po_sscount--; 2133 if (po->po_sscount == 0) { 2134 atomic_subtract_rel_int(&pmc_ss_count, 1); 2135 LIST_REMOVE(po, po_ssnext); 2136 } 2137 } 2138 2139 pm->pm_state = PMC_STATE_DELETED; 2140 2141 pmc_restore_cpu_binding(&pb); 2142 2143 /* 2144 * We could have references to this PMC structure in 2145 * the per-cpu sample queues. Wait for the queue to 2146 * drain. 2147 */ 2148 pmc_wait_for_pmc_idle(pm); 2149 2150 } else if (PMC_IS_VIRTUAL_MODE(mode)) { 2151 2152 /* 2153 * A virtual PMC could be running on multiple CPUs at 2154 * a given instant. 2155 * 2156 * By marking its state as DELETED, we ensure that 2157 * this PMC is never further scheduled on hardware. 2158 * 2159 * Then we wait till all CPUs are done with this PMC. 2160 */ 2161 pm->pm_state = PMC_STATE_DELETED; 2162 2163 2164 /* Wait for the PMCs runcount to come to zero. */ 2165 pmc_wait_for_pmc_idle(pm); 2166 2167 /* 2168 * At this point the PMC is off all CPUs and cannot be 2169 * freshly scheduled onto a CPU. It is now safe to 2170 * unlink all targets from this PMC. If a 2171 * process-record's refcount falls to zero, we remove 2172 * it from the hash table. The module-wide SX lock 2173 * protects us from races. 2174 */ 2175 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { 2176 pp = ptgt->pt_process; 2177 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ 2178 2179 PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); 2180 2181 /* 2182 * If the target process record shows that no 2183 * PMCs are attached to it, reclaim its space. 2184 */ 2185 2186 if (pp->pp_refcnt == 0) { 2187 pmc_remove_process_descriptor(pp); 2188 FREE(pp, M_PMC); 2189 } 2190 } 2191 2192 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ 2193 2194 } 2195 2196 /* 2197 * Release any MD resources 2198 */ 2199 2200 (void) md->pmd_release_pmc(cpu, ri, pm); 2201 2202 /* 2203 * Update row disposition 2204 */ 2205 2206 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) 2207 PMC_UNMARK_ROW_STANDALONE(ri); 2208 else 2209 PMC_UNMARK_ROW_THREAD(ri); 2210 2211 /* unlink from the owner's list */ 2212 if (pm->pm_owner) { 2213 LIST_REMOVE(pm, pm_next); 2214 pm->pm_owner = NULL; 2215 } 2216 2217 pmc_destroy_pmc_descriptor(pm); 2218 } 2219 2220 /* 2221 * Register an owner and a pmc. 2222 */ 2223 2224 static int 2225 pmc_register_owner(struct proc *p, struct pmc *pmc) 2226 { 2227 struct pmc_owner *po; 2228 2229 sx_assert(&pmc_sx, SX_XLOCKED); 2230 2231 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2232 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) 2233 return ENOMEM; 2234 2235 KASSERT(pmc->pm_owner == NULL, 2236 ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); 2237 pmc->pm_owner = po; 2238 2239 LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); 2240 2241 PROC_LOCK(p); 2242 p->p_flag |= P_HWPMC; 2243 PROC_UNLOCK(p); 2244 2245 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 2246 pmclog_process_pmcallocate(pmc); 2247 2248 PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", 2249 po, pmc); 2250 2251 return 0; 2252 } 2253 2254 /* 2255 * Return the current row disposition: 2256 * == 0 => FREE 2257 * > 0 => PROCESS MODE 2258 * < 0 => SYSTEM MODE 2259 */ 2260 2261 int 2262 pmc_getrowdisp(int ri) 2263 { 2264 return pmc_pmcdisp[ri]; 2265 } 2266 2267 /* 2268 * Check if a PMC at row index 'ri' can be allocated to the current 2269 * process. 2270 * 2271 * Allocation can fail if: 2272 * - the current process is already being profiled by a PMC at index 'ri', 2273 * attached to it via OP_PMCATTACH. 2274 * - the current process has already allocated a PMC at index 'ri' 2275 * via OP_ALLOCATE. 2276 */ 2277 2278 static int 2279 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) 2280 { 2281 enum pmc_mode mode; 2282 struct pmc *pm; 2283 struct pmc_owner *po; 2284 struct pmc_process *pp; 2285 2286 PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " 2287 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); 2288 2289 /* 2290 * We shouldn't have already allocated a process-mode PMC at 2291 * row index 'ri'. 2292 * 2293 * We shouldn't have allocated a system-wide PMC on the same 2294 * CPU and same RI. 2295 */ 2296 if ((po = pmc_find_owner_descriptor(p)) != NULL) 2297 LIST_FOREACH(pm, &po->po_pmcs, pm_next) { 2298 if (PMC_TO_ROWINDEX(pm) == ri) { 2299 mode = PMC_TO_MODE(pm); 2300 if (PMC_IS_VIRTUAL_MODE(mode)) 2301 return EEXIST; 2302 if (PMC_IS_SYSTEM_MODE(mode) && 2303 (int) PMC_TO_CPU(pm) == cpu) 2304 return EEXIST; 2305 } 2306 } 2307 2308 /* 2309 * We also shouldn't be the target of any PMC at this index 2310 * since otherwise a PMC_ATTACH to ourselves will fail. 2311 */ 2312 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) 2313 if (pp->pp_pmcs[ri].pp_pmc) 2314 return EEXIST; 2315 2316 PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", 2317 p, p->p_pid, p->p_comm, ri); 2318 2319 return 0; 2320 } 2321 2322 /* 2323 * Check if a given PMC at row index 'ri' can be currently used in 2324 * mode 'mode'. 2325 */ 2326 2327 static int 2328 pmc_can_allocate_row(int ri, enum pmc_mode mode) 2329 { 2330 enum pmc_disp disp; 2331 2332 sx_assert(&pmc_sx, SX_XLOCKED); 2333 2334 PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); 2335 2336 if (PMC_IS_SYSTEM_MODE(mode)) 2337 disp = PMC_DISP_STANDALONE; 2338 else 2339 disp = PMC_DISP_THREAD; 2340 2341 /* 2342 * check disposition for PMC row 'ri': 2343 * 2344 * Expected disposition Row-disposition Result 2345 * 2346 * STANDALONE STANDALONE or FREE proceed 2347 * STANDALONE THREAD fail 2348 * THREAD THREAD or FREE proceed 2349 * THREAD STANDALONE fail 2350 */ 2351 2352 if (!PMC_ROW_DISP_IS_FREE(ri) && 2353 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && 2354 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) 2355 return EBUSY; 2356 2357 /* 2358 * All OK 2359 */ 2360 2361 PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); 2362 2363 return 0; 2364 2365 } 2366 2367 /* 2368 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. 2369 */ 2370 2371 static struct pmc * 2372 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) 2373 { 2374 struct pmc *pm; 2375 2376 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, 2377 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, 2378 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); 2379 2380 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2381 if (pm->pm_id == pmcid) 2382 return pm; 2383 2384 return NULL; 2385 } 2386 2387 static int 2388 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) 2389 { 2390 2391 struct pmc *pm; 2392 struct pmc_owner *po; 2393 2394 PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid); 2395 2396 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) 2397 return ESRCH; 2398 2399 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) 2400 return EINVAL; 2401 2402 PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); 2403 2404 *pmc = pm; 2405 return 0; 2406 } 2407 2408 /* 2409 * Start a PMC. 2410 */ 2411 2412 static int 2413 pmc_start(struct pmc *pm) 2414 { 2415 int error, cpu, ri; 2416 enum pmc_mode mode; 2417 struct pmc_owner *po; 2418 struct pmc_binding pb; 2419 2420 KASSERT(pm != NULL, 2421 ("[pmc,%d] null pm", __LINE__)); 2422 2423 mode = PMC_TO_MODE(pm); 2424 ri = PMC_TO_ROWINDEX(pm); 2425 error = 0; 2426 2427 PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); 2428 2429 po = pm->pm_owner; 2430 2431 /* 2432 * Disallow PMCSTART if a logfile is required but has not been 2433 * configured yet. 2434 */ 2435 if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && 2436 (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) 2437 return EDOOFUS; /* programming error */ 2438 2439 /* 2440 * If this is a sampling mode PMC, log mapping information for 2441 * the kernel modules that are currently loaded. 2442 */ 2443 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2444 pmc_log_kernel_mappings(pm); 2445 2446 if (PMC_IS_VIRTUAL_MODE(mode)) { 2447 2448 /* 2449 * If a PMCATTACH has never been done on this PMC, 2450 * attach it to its owner process. 2451 */ 2452 2453 if (LIST_EMPTY(&pm->pm_targets)) 2454 error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : 2455 pmc_attach_process(po->po_owner, pm); 2456 2457 /* 2458 * If the PMC is attached to its owner, then force a context 2459 * switch to ensure that the MD state gets set correctly. 2460 */ 2461 2462 if (error == 0) { 2463 pm->pm_state = PMC_STATE_RUNNING; 2464 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) 2465 pmc_force_context_switch(); 2466 } 2467 2468 return error; 2469 } 2470 2471 2472 /* 2473 * A system-wide PMC. 2474 * 2475 * Add the owner to the global list if this is a system-wide 2476 * sampling PMC. 2477 */ 2478 2479 if (mode == PMC_MODE_SS) { 2480 if (po->po_sscount == 0) { 2481 LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); 2482 atomic_add_rel_int(&pmc_ss_count, 1); 2483 PMCDBG(PMC,OPS,1, "po=%p in global list", po); 2484 } 2485 po->po_sscount++; 2486 } 2487 2488 /* Log mapping information for all processes in the system. */ 2489 pmc_log_all_process_mappings(po); 2490 2491 /* 2492 * Move to the CPU associated with this 2493 * PMC, and start the hardware. 2494 */ 2495 2496 pmc_save_cpu_binding(&pb); 2497 2498 cpu = PMC_TO_CPU(pm); 2499 2500 if (pmc_cpu_is_disabled(cpu)) 2501 return ENXIO; 2502 2503 pmc_select_cpu(cpu); 2504 2505 /* 2506 * global PMCs are configured at allocation time 2507 * so write out the initial value and start the PMC. 2508 */ 2509 2510 pm->pm_state = PMC_STATE_RUNNING; 2511 2512 critical_enter(); 2513 if ((error = md->pmd_write_pmc(cpu, ri, 2514 PMC_IS_SAMPLING_MODE(mode) ? 2515 pm->pm_sc.pm_reloadcount : 2516 pm->pm_sc.pm_initial)) == 0) 2517 error = md->pmd_start_pmc(cpu, ri); 2518 critical_exit(); 2519 2520 pmc_restore_cpu_binding(&pb); 2521 2522 return error; 2523 } 2524 2525 /* 2526 * Stop a PMC. 2527 */ 2528 2529 static int 2530 pmc_stop(struct pmc *pm) 2531 { 2532 int cpu, error, ri; 2533 struct pmc_owner *po; 2534 struct pmc_binding pb; 2535 2536 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); 2537 2538 PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, 2539 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); 2540 2541 pm->pm_state = PMC_STATE_STOPPED; 2542 2543 /* 2544 * If the PMC is a virtual mode one, changing the state to 2545 * non-RUNNING is enough to ensure that the PMC never gets 2546 * scheduled. 2547 * 2548 * If this PMC is current running on a CPU, then it will 2549 * handled correctly at the time its target process is context 2550 * switched out. 2551 */ 2552 2553 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 2554 return 0; 2555 2556 /* 2557 * A system-mode PMC. Move to the CPU associated with 2558 * this PMC, and stop the hardware. We update the 2559 * 'initial count' so that a subsequent PMCSTART will 2560 * resume counting from the current hardware count. 2561 */ 2562 2563 pmc_save_cpu_binding(&pb); 2564 2565 cpu = PMC_TO_CPU(pm); 2566 2567 KASSERT(cpu >= 0 && cpu < mp_ncpus, 2568 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); 2569 2570 if (pmc_cpu_is_disabled(cpu)) 2571 return ENXIO; 2572 2573 pmc_select_cpu(cpu); 2574 2575 ri = PMC_TO_ROWINDEX(pm); 2576 2577 critical_enter(); 2578 if ((error = md->pmd_stop_pmc(cpu, ri)) == 0) 2579 error = md->pmd_read_pmc(cpu, ri, &pm->pm_sc.pm_initial); 2580 critical_exit(); 2581 2582 pmc_restore_cpu_binding(&pb); 2583 2584 po = pm->pm_owner; 2585 2586 /* remove this owner from the global list of SS PMC owners */ 2587 if (PMC_TO_MODE(pm) == PMC_MODE_SS) { 2588 po->po_sscount--; 2589 if (po->po_sscount == 0) { 2590 atomic_subtract_rel_int(&pmc_ss_count, 1); 2591 LIST_REMOVE(po, po_ssnext); 2592 PMCDBG(PMC,OPS,2,"po=%p removed from global list", po); 2593 } 2594 } 2595 2596 return error; 2597 } 2598 2599 2600 #ifdef DEBUG 2601 static const char *pmc_op_to_name[] = { 2602 #undef __PMC_OP 2603 #define __PMC_OP(N, D) #N , 2604 __PMC_OPS() 2605 NULL 2606 }; 2607 #endif 2608 2609 /* 2610 * The syscall interface 2611 */ 2612 2613 #define PMC_GET_SX_XLOCK(...) do { \ 2614 sx_xlock(&pmc_sx); \ 2615 if (pmc_hook == NULL) { \ 2616 sx_xunlock(&pmc_sx); \ 2617 return __VA_ARGS__; \ 2618 } \ 2619 } while (0) 2620 2621 #define PMC_DOWNGRADE_SX() do { \ 2622 sx_downgrade(&pmc_sx); \ 2623 is_sx_downgraded = 1; \ 2624 } while (0) 2625 2626 static int 2627 pmc_syscall_handler(struct thread *td, void *syscall_args) 2628 { 2629 int error, is_sx_downgraded, op; 2630 struct pmc_syscall_args *c; 2631 void *arg; 2632 2633 PMC_GET_SX_XLOCK(ENOSYS); 2634 2635 DROP_GIANT(); 2636 2637 is_sx_downgraded = 0; 2638 2639 c = (struct pmc_syscall_args *) syscall_args; 2640 2641 op = c->pmop_code; 2642 arg = c->pmop_data; 2643 2644 PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, 2645 pmc_op_to_name[op], arg); 2646 2647 error = 0; 2648 atomic_add_int(&pmc_stats.pm_syscalls, 1); 2649 2650 switch(op) 2651 { 2652 2653 2654 /* 2655 * Configure a log file. 2656 * 2657 * XXX This OP will be reworked. 2658 */ 2659 2660 case PMC_OP_CONFIGURELOG: 2661 { 2662 struct proc *p; 2663 struct pmc *pm; 2664 struct pmc_owner *po; 2665 struct pmc_op_configurelog cl; 2666 2667 sx_assert(&pmc_sx, SX_XLOCKED); 2668 2669 if ((error = copyin(arg, &cl, sizeof(cl))) != 0) 2670 break; 2671 2672 /* mark this process as owning a log file */ 2673 p = td->td_proc; 2674 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2675 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { 2676 error = ENOMEM; 2677 break; 2678 } 2679 2680 /* 2681 * If a valid fd was passed in, try to configure that, 2682 * otherwise if 'fd' was less than zero and there was 2683 * a log file configured, flush its buffers and 2684 * de-configure it. 2685 */ 2686 if (cl.pm_logfd >= 0) 2687 error = pmclog_configure_log(po, cl.pm_logfd); 2688 else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { 2689 pmclog_process_closelog(po); 2690 error = pmclog_flush(po); 2691 if (error == 0) { 2692 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2693 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 2694 pm->pm_state == PMC_STATE_RUNNING) 2695 pmc_stop(pm); 2696 error = pmclog_deconfigure_log(po); 2697 } 2698 } else 2699 error = EINVAL; 2700 2701 if (error) 2702 break; 2703 } 2704 break; 2705 2706 2707 /* 2708 * Flush a log file. 2709 */ 2710 2711 case PMC_OP_FLUSHLOG: 2712 { 2713 struct pmc_owner *po; 2714 2715 sx_assert(&pmc_sx, SX_XLOCKED); 2716 2717 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 2718 error = EINVAL; 2719 break; 2720 } 2721 2722 error = pmclog_flush(po); 2723 } 2724 break; 2725 2726 /* 2727 * Retrieve hardware configuration. 2728 */ 2729 2730 case PMC_OP_GETCPUINFO: /* CPU information */ 2731 { 2732 struct pmc_op_getcpuinfo gci; 2733 2734 gci.pm_cputype = md->pmd_cputype; 2735 gci.pm_ncpu = mp_ncpus; 2736 gci.pm_npmc = md->pmd_npmc; 2737 gci.pm_nclass = md->pmd_nclass; 2738 bcopy(md->pmd_classes, &gci.pm_classes, 2739 sizeof(gci.pm_classes)); 2740 error = copyout(&gci, arg, sizeof(gci)); 2741 } 2742 break; 2743 2744 2745 /* 2746 * Get module statistics 2747 */ 2748 2749 case PMC_OP_GETDRIVERSTATS: 2750 { 2751 struct pmc_op_getdriverstats gms; 2752 2753 bcopy(&pmc_stats, &gms, sizeof(gms)); 2754 error = copyout(&gms, arg, sizeof(gms)); 2755 } 2756 break; 2757 2758 2759 /* 2760 * Retrieve module version number 2761 */ 2762 2763 case PMC_OP_GETMODULEVERSION: 2764 { 2765 uint32_t cv, modv; 2766 2767 /* retrieve the client's idea of the ABI version */ 2768 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) 2769 break; 2770 /* don't service clients newer than our driver */ 2771 modv = PMC_VERSION; 2772 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { 2773 error = EPROGMISMATCH; 2774 break; 2775 } 2776 error = copyout(&modv, arg, sizeof(int)); 2777 } 2778 break; 2779 2780 2781 /* 2782 * Retrieve the state of all the PMCs on a given 2783 * CPU. 2784 */ 2785 2786 case PMC_OP_GETPMCINFO: 2787 { 2788 uint32_t cpu, n, npmc; 2789 size_t pmcinfo_size; 2790 struct pmc *pm; 2791 struct pmc_info *p, *pmcinfo; 2792 struct pmc_op_getpmcinfo *gpi; 2793 struct pmc_owner *po; 2794 struct pmc_binding pb; 2795 2796 PMC_DOWNGRADE_SX(); 2797 2798 gpi = (struct pmc_op_getpmcinfo *) arg; 2799 2800 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) 2801 break; 2802 2803 if (cpu >= (unsigned int) mp_ncpus) { 2804 error = EINVAL; 2805 break; 2806 } 2807 2808 if (pmc_cpu_is_disabled(cpu)) { 2809 error = ENXIO; 2810 break; 2811 } 2812 2813 /* switch to CPU 'cpu' */ 2814 pmc_save_cpu_binding(&pb); 2815 pmc_select_cpu(cpu); 2816 2817 npmc = md->pmd_npmc; 2818 2819 pmcinfo_size = npmc * sizeof(struct pmc_info); 2820 MALLOC(pmcinfo, struct pmc_info *, pmcinfo_size, M_PMC, 2821 M_WAITOK); 2822 2823 p = pmcinfo; 2824 2825 for (n = 0; n < md->pmd_npmc; n++, p++) { 2826 2827 if ((error = md->pmd_describe(cpu, n, p, &pm)) != 0) 2828 break; 2829 2830 if (PMC_ROW_DISP_IS_STANDALONE(n)) 2831 p->pm_rowdisp = PMC_DISP_STANDALONE; 2832 else if (PMC_ROW_DISP_IS_THREAD(n)) 2833 p->pm_rowdisp = PMC_DISP_THREAD; 2834 else 2835 p->pm_rowdisp = PMC_DISP_FREE; 2836 2837 p->pm_ownerpid = -1; 2838 2839 if (pm == NULL) /* no PMC associated */ 2840 continue; 2841 2842 po = pm->pm_owner; 2843 2844 KASSERT(po->po_owner != NULL, 2845 ("[pmc,%d] pmc_owner had a null proc pointer", 2846 __LINE__)); 2847 2848 p->pm_ownerpid = po->po_owner->p_pid; 2849 p->pm_mode = PMC_TO_MODE(pm); 2850 p->pm_event = pm->pm_event; 2851 p->pm_flags = pm->pm_flags; 2852 2853 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2854 p->pm_reloadcount = 2855 pm->pm_sc.pm_reloadcount; 2856 } 2857 2858 pmc_restore_cpu_binding(&pb); 2859 2860 /* now copy out the PMC info collected */ 2861 if (error == 0) 2862 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); 2863 2864 FREE(pmcinfo, M_PMC); 2865 } 2866 break; 2867 2868 2869 /* 2870 * Set the administrative state of a PMC. I.e. whether 2871 * the PMC is to be used or not. 2872 */ 2873 2874 case PMC_OP_PMCADMIN: 2875 { 2876 int cpu, ri; 2877 enum pmc_state request; 2878 struct pmc_cpu *pc; 2879 struct pmc_hw *phw; 2880 struct pmc_op_pmcadmin pma; 2881 struct pmc_binding pb; 2882 2883 sx_assert(&pmc_sx, SX_XLOCKED); 2884 2885 KASSERT(td == curthread, 2886 ("[pmc,%d] td != curthread", __LINE__)); 2887 2888 error = priv_check(td, PRIV_PMC_MANAGE); 2889 if (error) 2890 break; 2891 2892 if ((error = copyin(arg, &pma, sizeof(pma))) != 0) 2893 break; 2894 2895 cpu = pma.pm_cpu; 2896 2897 if (cpu < 0 || cpu >= mp_ncpus) { 2898 error = EINVAL; 2899 break; 2900 } 2901 2902 if (pmc_cpu_is_disabled(cpu)) { 2903 error = ENXIO; 2904 break; 2905 } 2906 2907 request = pma.pm_state; 2908 2909 if (request != PMC_STATE_DISABLED && 2910 request != PMC_STATE_FREE) { 2911 error = EINVAL; 2912 break; 2913 } 2914 2915 ri = pma.pm_pmc; /* pmc id == row index */ 2916 if (ri < 0 || ri >= (int) md->pmd_npmc) { 2917 error = EINVAL; 2918 break; 2919 } 2920 2921 /* 2922 * We can't disable a PMC with a row-index allocated 2923 * for process virtual PMCs. 2924 */ 2925 2926 if (PMC_ROW_DISP_IS_THREAD(ri) && 2927 request == PMC_STATE_DISABLED) { 2928 error = EBUSY; 2929 break; 2930 } 2931 2932 /* 2933 * otherwise, this PMC on this CPU is either free or 2934 * in system-wide mode. 2935 */ 2936 2937 pmc_save_cpu_binding(&pb); 2938 pmc_select_cpu(cpu); 2939 2940 pc = pmc_pcpu[cpu]; 2941 phw = pc->pc_hwpmcs[ri]; 2942 2943 /* 2944 * XXX do we need some kind of 'forced' disable? 2945 */ 2946 2947 if (phw->phw_pmc == NULL) { 2948 if (request == PMC_STATE_DISABLED && 2949 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { 2950 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; 2951 PMC_MARK_ROW_STANDALONE(ri); 2952 } else if (request == PMC_STATE_FREE && 2953 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { 2954 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; 2955 PMC_UNMARK_ROW_STANDALONE(ri); 2956 } 2957 /* other cases are a no-op */ 2958 } else 2959 error = EBUSY; 2960 2961 pmc_restore_cpu_binding(&pb); 2962 } 2963 break; 2964 2965 2966 /* 2967 * Allocate a PMC. 2968 */ 2969 2970 case PMC_OP_PMCALLOCATE: 2971 { 2972 uint32_t caps; 2973 u_int cpu; 2974 int n; 2975 enum pmc_mode mode; 2976 struct pmc *pmc; 2977 struct pmc_hw *phw; 2978 struct pmc_op_pmcallocate pa; 2979 struct pmc_binding pb; 2980 2981 if ((error = copyin(arg, &pa, sizeof(pa))) != 0) 2982 break; 2983 2984 caps = pa.pm_caps; 2985 mode = pa.pm_mode; 2986 cpu = pa.pm_cpu; 2987 2988 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && 2989 mode != PMC_MODE_TS && mode != PMC_MODE_TC) || 2990 (cpu != (u_int) PMC_CPU_ANY && cpu >= (u_int) mp_ncpus)) { 2991 error = EINVAL; 2992 break; 2993 } 2994 2995 /* 2996 * Virtual PMCs should only ask for a default CPU. 2997 * System mode PMCs need to specify a non-default CPU. 2998 */ 2999 3000 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || 3001 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { 3002 error = EINVAL; 3003 break; 3004 } 3005 3006 /* 3007 * Check that a disabled CPU is not being asked for. 3008 */ 3009 3010 if (PMC_IS_SYSTEM_MODE(mode) && pmc_cpu_is_disabled(cpu)) { 3011 error = ENXIO; 3012 break; 3013 } 3014 3015 /* 3016 * Refuse an allocation for a system-wide PMC if this 3017 * process has been jailed, or if this process lacks 3018 * super-user credentials and the sysctl tunable 3019 * 'security.bsd.unprivileged_syspmcs' is zero. 3020 */ 3021 3022 if (PMC_IS_SYSTEM_MODE(mode)) { 3023 if (jailed(curthread->td_ucred)) { 3024 error = EPERM; 3025 break; 3026 } 3027 if (!pmc_unprivileged_syspmcs) { 3028 error = priv_check(curthread, 3029 PRIV_PMC_SYSTEM); 3030 if (error) 3031 break; 3032 } 3033 } 3034 3035 if (error) 3036 break; 3037 3038 /* 3039 * Look for valid values for 'pm_flags' 3040 */ 3041 3042 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | 3043 PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { 3044 error = EINVAL; 3045 break; 3046 } 3047 3048 /* process logging options are not allowed for system PMCs */ 3049 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & 3050 (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { 3051 error = EINVAL; 3052 break; 3053 } 3054 3055 /* 3056 * All sampling mode PMCs need to be able to interrupt the 3057 * CPU. 3058 */ 3059 if (PMC_IS_SAMPLING_MODE(mode)) 3060 caps |= PMC_CAP_INTERRUPT; 3061 3062 /* A valid class specifier should have been passed in. */ 3063 for (n = 0; n < md->pmd_nclass; n++) 3064 if (md->pmd_classes[n].pm_class == pa.pm_class) 3065 break; 3066 if (n == md->pmd_nclass) { 3067 error = EINVAL; 3068 break; 3069 } 3070 3071 /* The requested PMC capabilities should be feasible. */ 3072 if ((md->pmd_classes[n].pm_caps & caps) != caps) { 3073 error = EOPNOTSUPP; 3074 break; 3075 } 3076 3077 PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", 3078 pa.pm_ev, caps, mode, cpu); 3079 3080 pmc = pmc_allocate_pmc_descriptor(); 3081 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, 3082 PMC_ID_INVALID); 3083 pmc->pm_event = pa.pm_ev; 3084 pmc->pm_state = PMC_STATE_FREE; 3085 pmc->pm_caps = caps; 3086 pmc->pm_flags = pa.pm_flags; 3087 3088 /* switch thread to CPU 'cpu' */ 3089 pmc_save_cpu_binding(&pb); 3090 3091 #define PMC_IS_SHAREABLE_PMC(cpu, n) \ 3092 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ 3093 PMC_PHW_FLAG_IS_SHAREABLE) 3094 #define PMC_IS_UNALLOCATED(cpu, n) \ 3095 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) 3096 3097 if (PMC_IS_SYSTEM_MODE(mode)) { 3098 pmc_select_cpu(cpu); 3099 for (n = 0; n < (int) md->pmd_npmc; n++) 3100 if (pmc_can_allocate_row(n, mode) == 0 && 3101 pmc_can_allocate_rowindex( 3102 curthread->td_proc, n, cpu) == 0 && 3103 (PMC_IS_UNALLOCATED(cpu, n) || 3104 PMC_IS_SHAREABLE_PMC(cpu, n)) && 3105 md->pmd_allocate_pmc(cpu, n, pmc, 3106 &pa) == 0) 3107 break; 3108 } else { 3109 /* Process virtual mode */ 3110 for (n = 0; n < (int) md->pmd_npmc; n++) { 3111 if (pmc_can_allocate_row(n, mode) == 0 && 3112 pmc_can_allocate_rowindex( 3113 curthread->td_proc, n, 3114 PMC_CPU_ANY) == 0 && 3115 md->pmd_allocate_pmc(curthread->td_oncpu, 3116 n, pmc, &pa) == 0) 3117 break; 3118 } 3119 } 3120 3121 #undef PMC_IS_UNALLOCATED 3122 #undef PMC_IS_SHAREABLE_PMC 3123 3124 pmc_restore_cpu_binding(&pb); 3125 3126 if (n == (int) md->pmd_npmc) { 3127 pmc_destroy_pmc_descriptor(pmc); 3128 FREE(pmc, M_PMC); 3129 pmc = NULL; 3130 error = EINVAL; 3131 break; 3132 } 3133 3134 /* Fill in the correct value in the ID field */ 3135 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); 3136 3137 PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", 3138 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); 3139 3140 /* Process mode PMCs with logging enabled need log files */ 3141 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) 3142 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3143 3144 /* All system mode sampling PMCs require a log file */ 3145 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) 3146 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3147 3148 /* 3149 * Configure global pmc's immediately 3150 */ 3151 3152 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { 3153 3154 pmc_save_cpu_binding(&pb); 3155 pmc_select_cpu(cpu); 3156 3157 phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; 3158 3159 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || 3160 (error = md->pmd_config_pmc(cpu, n, pmc)) != 0) { 3161 (void) md->pmd_release_pmc(cpu, n, pmc); 3162 pmc_destroy_pmc_descriptor(pmc); 3163 FREE(pmc, M_PMC); 3164 pmc = NULL; 3165 pmc_restore_cpu_binding(&pb); 3166 error = EPERM; 3167 break; 3168 } 3169 3170 pmc_restore_cpu_binding(&pb); 3171 } 3172 3173 pmc->pm_state = PMC_STATE_ALLOCATED; 3174 3175 /* 3176 * mark row disposition 3177 */ 3178 3179 if (PMC_IS_SYSTEM_MODE(mode)) 3180 PMC_MARK_ROW_STANDALONE(n); 3181 else 3182 PMC_MARK_ROW_THREAD(n); 3183 3184 /* 3185 * Register this PMC with the current thread as its owner. 3186 */ 3187 3188 if ((error = 3189 pmc_register_owner(curthread->td_proc, pmc)) != 0) { 3190 pmc_release_pmc_descriptor(pmc); 3191 FREE(pmc, M_PMC); 3192 pmc = NULL; 3193 break; 3194 } 3195 3196 /* 3197 * Return the allocated index. 3198 */ 3199 3200 pa.pm_pmcid = pmc->pm_id; 3201 3202 error = copyout(&pa, arg, sizeof(pa)); 3203 } 3204 break; 3205 3206 3207 /* 3208 * Attach a PMC to a process. 3209 */ 3210 3211 case PMC_OP_PMCATTACH: 3212 { 3213 struct pmc *pm; 3214 struct proc *p; 3215 struct pmc_op_pmcattach a; 3216 3217 sx_assert(&pmc_sx, SX_XLOCKED); 3218 3219 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3220 break; 3221 3222 if (a.pm_pid < 0) { 3223 error = EINVAL; 3224 break; 3225 } else if (a.pm_pid == 0) 3226 a.pm_pid = td->td_proc->p_pid; 3227 3228 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3229 break; 3230 3231 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 3232 error = EINVAL; 3233 break; 3234 } 3235 3236 /* PMCs may be (re)attached only when allocated or stopped */ 3237 if (pm->pm_state == PMC_STATE_RUNNING) { 3238 error = EBUSY; 3239 break; 3240 } else if (pm->pm_state != PMC_STATE_ALLOCATED && 3241 pm->pm_state != PMC_STATE_STOPPED) { 3242 error = EINVAL; 3243 break; 3244 } 3245 3246 /* lookup pid */ 3247 if ((p = pfind(a.pm_pid)) == NULL) { 3248 error = ESRCH; 3249 break; 3250 } 3251 3252 /* 3253 * Ignore processes that are working on exiting. 3254 */ 3255 if (p->p_flag & P_WEXIT) { 3256 error = ESRCH; 3257 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3258 break; 3259 } 3260 3261 /* 3262 * we are allowed to attach a PMC to a process if 3263 * we can debug it. 3264 */ 3265 error = p_candebug(curthread, p); 3266 3267 PROC_UNLOCK(p); 3268 3269 if (error == 0) 3270 error = pmc_attach_process(p, pm); 3271 } 3272 break; 3273 3274 3275 /* 3276 * Detach an attached PMC from a process. 3277 */ 3278 3279 case PMC_OP_PMCDETACH: 3280 { 3281 struct pmc *pm; 3282 struct proc *p; 3283 struct pmc_op_pmcattach a; 3284 3285 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3286 break; 3287 3288 if (a.pm_pid < 0) { 3289 error = EINVAL; 3290 break; 3291 } else if (a.pm_pid == 0) 3292 a.pm_pid = td->td_proc->p_pid; 3293 3294 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3295 break; 3296 3297 if ((p = pfind(a.pm_pid)) == NULL) { 3298 error = ESRCH; 3299 break; 3300 } 3301 3302 /* 3303 * Treat processes that are in the process of exiting 3304 * as if they were not present. 3305 */ 3306 3307 if (p->p_flag & P_WEXIT) 3308 error = ESRCH; 3309 3310 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3311 3312 if (error == 0) 3313 error = pmc_detach_process(p, pm); 3314 } 3315 break; 3316 3317 3318 /* 3319 * Retrieve the MSR number associated with the counter 3320 * 'pmc_id'. This allows processes to directly use RDPMC 3321 * instructions to read their PMCs, without the overhead of a 3322 * system call. 3323 */ 3324 3325 case PMC_OP_PMCGETMSR: 3326 { 3327 int ri; 3328 struct pmc *pm; 3329 struct pmc_target *pt; 3330 struct pmc_op_getmsr gm; 3331 3332 PMC_DOWNGRADE_SX(); 3333 3334 /* CPU has no 'GETMSR' support */ 3335 if (md->pmd_get_msr == NULL) { 3336 error = ENOSYS; 3337 break; 3338 } 3339 3340 if ((error = copyin(arg, &gm, sizeof(gm))) != 0) 3341 break; 3342 3343 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) 3344 break; 3345 3346 /* 3347 * The allocated PMC has to be a process virtual PMC, 3348 * i.e., of type MODE_T[CS]. Global PMCs can only be 3349 * read using the PMCREAD operation since they may be 3350 * allocated on a different CPU than the one we could 3351 * be running on at the time of the RDPMC instruction. 3352 * 3353 * The GETMSR operation is not allowed for PMCs that 3354 * are inherited across processes. 3355 */ 3356 3357 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || 3358 (pm->pm_flags & PMC_F_DESCENDANTS)) { 3359 error = EINVAL; 3360 break; 3361 } 3362 3363 /* 3364 * It only makes sense to use a RDPMC (or its 3365 * equivalent instruction on non-x86 architectures) on 3366 * a process that has allocated and attached a PMC to 3367 * itself. Conversely the PMC is only allowed to have 3368 * one process attached to it -- its owner. 3369 */ 3370 3371 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || 3372 LIST_NEXT(pt, pt_next) != NULL || 3373 pt->pt_process->pp_proc != pm->pm_owner->po_owner) { 3374 error = EINVAL; 3375 break; 3376 } 3377 3378 ri = PMC_TO_ROWINDEX(pm); 3379 3380 if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0) 3381 break; 3382 3383 if ((error = copyout(&gm, arg, sizeof(gm))) < 0) 3384 break; 3385 3386 /* 3387 * Mark our process as using MSRs. Update machine 3388 * state using a forced context switch. 3389 */ 3390 3391 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; 3392 pmc_force_context_switch(); 3393 3394 } 3395 break; 3396 3397 /* 3398 * Release an allocated PMC 3399 */ 3400 3401 case PMC_OP_PMCRELEASE: 3402 { 3403 pmc_id_t pmcid; 3404 struct pmc *pm; 3405 struct pmc_owner *po; 3406 struct pmc_op_simple sp; 3407 3408 /* 3409 * Find PMC pointer for the named PMC. 3410 * 3411 * Use pmc_release_pmc_descriptor() to switch off the 3412 * PMC, remove all its target threads, and remove the 3413 * PMC from its owner's list. 3414 * 3415 * Remove the owner record if this is the last PMC 3416 * owned. 3417 * 3418 * Free up space. 3419 */ 3420 3421 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3422 break; 3423 3424 pmcid = sp.pm_pmcid; 3425 3426 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3427 break; 3428 3429 po = pm->pm_owner; 3430 pmc_release_pmc_descriptor(pm); 3431 pmc_maybe_remove_owner(po); 3432 3433 FREE(pm, M_PMC); 3434 } 3435 break; 3436 3437 3438 /* 3439 * Read and/or write a PMC. 3440 */ 3441 3442 case PMC_OP_PMCRW: 3443 { 3444 uint32_t cpu, ri; 3445 struct pmc *pm; 3446 struct pmc_op_pmcrw *pprw; 3447 struct pmc_op_pmcrw prw; 3448 struct pmc_binding pb; 3449 pmc_value_t oldvalue; 3450 3451 PMC_DOWNGRADE_SX(); 3452 3453 if ((error = copyin(arg, &prw, sizeof(prw))) != 0) 3454 break; 3455 3456 ri = 0; 3457 PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, 3458 prw.pm_flags); 3459 3460 /* must have at least one flag set */ 3461 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { 3462 error = EINVAL; 3463 break; 3464 } 3465 3466 /* locate pmc descriptor */ 3467 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) 3468 break; 3469 3470 /* Can't read a PMC that hasn't been started. */ 3471 if (pm->pm_state != PMC_STATE_ALLOCATED && 3472 pm->pm_state != PMC_STATE_STOPPED && 3473 pm->pm_state != PMC_STATE_RUNNING) { 3474 error = EINVAL; 3475 break; 3476 } 3477 3478 /* writing a new value is allowed only for 'STOPPED' pmcs */ 3479 if (pm->pm_state == PMC_STATE_RUNNING && 3480 (prw.pm_flags & PMC_F_NEWVALUE)) { 3481 error = EBUSY; 3482 break; 3483 } 3484 3485 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 3486 3487 /* 3488 * If this PMC is attached to its owner (i.e., 3489 * the process requesting this operation) and 3490 * is running, then attempt to get an 3491 * upto-date reading from hardware for a READ. 3492 * Writes are only allowed when the PMC is 3493 * stopped, so only update the saved value 3494 * field. 3495 * 3496 * If the PMC is not running, or is not 3497 * attached to its owner, read/write to the 3498 * savedvalue field. 3499 */ 3500 3501 ri = PMC_TO_ROWINDEX(pm); 3502 3503 mtx_pool_lock_spin(pmc_mtxpool, pm); 3504 cpu = curthread->td_oncpu; 3505 3506 if (prw.pm_flags & PMC_F_OLDVALUE) { 3507 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && 3508 (pm->pm_state == PMC_STATE_RUNNING)) 3509 error = (*md->pmd_read_pmc)(cpu, ri, 3510 &oldvalue); 3511 else 3512 oldvalue = pm->pm_gv.pm_savedvalue; 3513 } 3514 if (prw.pm_flags & PMC_F_NEWVALUE) 3515 pm->pm_gv.pm_savedvalue = prw.pm_value; 3516 3517 mtx_pool_unlock_spin(pmc_mtxpool, pm); 3518 3519 } else { /* System mode PMCs */ 3520 cpu = PMC_TO_CPU(pm); 3521 ri = PMC_TO_ROWINDEX(pm); 3522 3523 if (pmc_cpu_is_disabled(cpu)) { 3524 error = ENXIO; 3525 break; 3526 } 3527 3528 /* move this thread to CPU 'cpu' */ 3529 pmc_save_cpu_binding(&pb); 3530 pmc_select_cpu(cpu); 3531 3532 critical_enter(); 3533 /* save old value */ 3534 if (prw.pm_flags & PMC_F_OLDVALUE) 3535 if ((error = (*md->pmd_read_pmc)(cpu, ri, 3536 &oldvalue))) 3537 goto error; 3538 /* write out new value */ 3539 if (prw.pm_flags & PMC_F_NEWVALUE) 3540 error = (*md->pmd_write_pmc)(cpu, ri, 3541 prw.pm_value); 3542 error: 3543 critical_exit(); 3544 pmc_restore_cpu_binding(&pb); 3545 if (error) 3546 break; 3547 } 3548 3549 pprw = (struct pmc_op_pmcrw *) arg; 3550 3551 #ifdef DEBUG 3552 if (prw.pm_flags & PMC_F_NEWVALUE) 3553 PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx", 3554 ri, prw.pm_value, oldvalue); 3555 else if (prw.pm_flags & PMC_F_OLDVALUE) 3556 PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); 3557 #endif 3558 3559 /* return old value if requested */ 3560 if (prw.pm_flags & PMC_F_OLDVALUE) 3561 if ((error = copyout(&oldvalue, &pprw->pm_value, 3562 sizeof(prw.pm_value)))) 3563 break; 3564 3565 } 3566 break; 3567 3568 3569 /* 3570 * Set the sampling rate for a sampling mode PMC and the 3571 * initial count for a counting mode PMC. 3572 */ 3573 3574 case PMC_OP_PMCSETCOUNT: 3575 { 3576 struct pmc *pm; 3577 struct pmc_op_pmcsetcount sc; 3578 3579 PMC_DOWNGRADE_SX(); 3580 3581 if ((error = copyin(arg, &sc, sizeof(sc))) != 0) 3582 break; 3583 3584 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) 3585 break; 3586 3587 if (pm->pm_state == PMC_STATE_RUNNING) { 3588 error = EBUSY; 3589 break; 3590 } 3591 3592 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 3593 pm->pm_sc.pm_reloadcount = sc.pm_count; 3594 else 3595 pm->pm_sc.pm_initial = sc.pm_count; 3596 } 3597 break; 3598 3599 3600 /* 3601 * Start a PMC. 3602 */ 3603 3604 case PMC_OP_PMCSTART: 3605 { 3606 pmc_id_t pmcid; 3607 struct pmc *pm; 3608 struct pmc_op_simple sp; 3609 3610 sx_assert(&pmc_sx, SX_XLOCKED); 3611 3612 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3613 break; 3614 3615 pmcid = sp.pm_pmcid; 3616 3617 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3618 break; 3619 3620 KASSERT(pmcid == pm->pm_id, 3621 ("[pmc,%d] pmcid %x != id %x", __LINE__, 3622 pm->pm_id, pmcid)); 3623 3624 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ 3625 break; 3626 else if (pm->pm_state != PMC_STATE_STOPPED && 3627 pm->pm_state != PMC_STATE_ALLOCATED) { 3628 error = EINVAL; 3629 break; 3630 } 3631 3632 error = pmc_start(pm); 3633 } 3634 break; 3635 3636 3637 /* 3638 * Stop a PMC. 3639 */ 3640 3641 case PMC_OP_PMCSTOP: 3642 { 3643 pmc_id_t pmcid; 3644 struct pmc *pm; 3645 struct pmc_op_simple sp; 3646 3647 PMC_DOWNGRADE_SX(); 3648 3649 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3650 break; 3651 3652 pmcid = sp.pm_pmcid; 3653 3654 /* 3655 * Mark the PMC as inactive and invoke the MD stop 3656 * routines if needed. 3657 */ 3658 3659 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3660 break; 3661 3662 KASSERT(pmcid == pm->pm_id, 3663 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, 3664 pm->pm_id, pmcid)); 3665 3666 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ 3667 break; 3668 else if (pm->pm_state != PMC_STATE_RUNNING) { 3669 error = EINVAL; 3670 break; 3671 } 3672 3673 error = pmc_stop(pm); 3674 } 3675 break; 3676 3677 3678 /* 3679 * Write a user supplied value to the log file. 3680 */ 3681 3682 case PMC_OP_WRITELOG: 3683 { 3684 struct pmc_op_writelog wl; 3685 struct pmc_owner *po; 3686 3687 PMC_DOWNGRADE_SX(); 3688 3689 if ((error = copyin(arg, &wl, sizeof(wl))) != 0) 3690 break; 3691 3692 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 3693 error = EINVAL; 3694 break; 3695 } 3696 3697 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { 3698 error = EINVAL; 3699 break; 3700 } 3701 3702 error = pmclog_process_userlog(po, &wl); 3703 } 3704 break; 3705 3706 3707 default: 3708 error = EINVAL; 3709 break; 3710 } 3711 3712 if (is_sx_downgraded) 3713 sx_sunlock(&pmc_sx); 3714 else 3715 sx_xunlock(&pmc_sx); 3716 3717 if (error) 3718 atomic_add_int(&pmc_stats.pm_syscall_errors, 1); 3719 3720 PICKUP_GIANT(); 3721 3722 return error; 3723 } 3724 3725 /* 3726 * Helper functions 3727 */ 3728 3729 3730 /* 3731 * Mark the thread as needing callchain capture and post an AST. The 3732 * actual callchain capture will be done in a context where it is safe 3733 * to take page faults. 3734 */ 3735 3736 static void 3737 pmc_post_callchain_ast(void) 3738 { 3739 struct thread *td; 3740 3741 td = curthread; 3742 3743 /* 3744 * Mark this thread as needing processing in ast(). 3745 * td->td_pflags will be safe to touch as the process was in 3746 * user space when it was interrupted. 3747 */ 3748 td->td_pflags |= TDP_CALLCHAIN; 3749 3750 /* 3751 * Again, since we've entered this function directly from 3752 * userland, `td' is guaranteed to be not locked by this CPU, 3753 * so its safe to try acquire the thread lock even though we 3754 * are executing in an NMI context. We need to acquire this 3755 * lock before touching `td_flags' because other CPUs may be 3756 * in the process of touching this field. 3757 */ 3758 thread_lock(td); 3759 td->td_flags |= TDF_ASTPENDING; 3760 thread_unlock(td); 3761 3762 return; 3763 } 3764 3765 /* 3766 * Interrupt processing. 3767 * 3768 * Find a free slot in the per-cpu array of samples and capture the 3769 * current callchain there. If a sample was successfully added, a bit 3770 * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook 3771 * needs to be invoked from the clock handler. 3772 * 3773 * This function is meant to be called from an NMI handler. It cannot 3774 * use any of the locking primitives supplied by the OS. 3775 */ 3776 3777 int 3778 pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, 3779 int inuserspace) 3780 { 3781 int error, callchaindepth; 3782 struct thread *td; 3783 struct pmc_sample *ps; 3784 struct pmc_samplebuffer *psb; 3785 3786 error = 0; 3787 3788 /* 3789 * Allocate space for a sample buffer. 3790 */ 3791 psb = pmc_pcpu[cpu]->pc_sb; 3792 3793 ps = psb->ps_write; 3794 if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ 3795 pm->pm_stalled = 1; 3796 atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); 3797 PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", 3798 cpu, pm, (void *) tf, inuserspace, 3799 (int) (psb->ps_write - psb->ps_samples), 3800 (int) (psb->ps_read - psb->ps_samples)); 3801 error = ENOMEM; 3802 goto done; 3803 } 3804 3805 3806 /* Fill in entry. */ 3807 PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, 3808 (void *) tf, inuserspace, 3809 (int) (psb->ps_write - psb->ps_samples), 3810 (int) (psb->ps_read - psb->ps_samples)); 3811 3812 atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */ 3813 ps->ps_pmc = pm; 3814 if ((td = curthread) && td->td_proc) 3815 ps->ps_pid = td->td_proc->p_pid; 3816 else 3817 ps->ps_pid = -1; 3818 ps->ps_cpu = cpu; 3819 ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; 3820 3821 callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? 3822 pmc_callchaindepth : 1; 3823 3824 if (callchaindepth == 1) 3825 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); 3826 else { 3827 /* 3828 * Kernel stack traversals can be done immediately, 3829 * while we defer to an AST for user space traversals. 3830 */ 3831 if (!inuserspace) 3832 callchaindepth = 3833 pmc_save_kernel_callchain(ps->ps_pc, 3834 callchaindepth, tf); 3835 else { 3836 pmc_post_callchain_ast(); 3837 callchaindepth = PMC_SAMPLE_INUSE; 3838 } 3839 } 3840 3841 ps->ps_nsamples = callchaindepth; /* mark entry as in use */ 3842 3843 /* increment write pointer, modulo ring buffer size */ 3844 ps++; 3845 if (ps == psb->ps_fence) 3846 psb->ps_write = psb->ps_samples; 3847 else 3848 psb->ps_write = ps; 3849 3850 done: 3851 /* mark CPU as needing processing */ 3852 atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); 3853 3854 return (error); 3855 } 3856 3857 /* 3858 * Capture a user call chain. This function will be called from ast() 3859 * before control returns to userland and before the process gets 3860 * rescheduled. 3861 */ 3862 3863 static void 3864 pmc_capture_user_callchain(int cpu, struct trapframe *tf) 3865 { 3866 int i; 3867 struct pmc *pm; 3868 struct pmc_sample *ps; 3869 struct pmc_samplebuffer *psb; 3870 3871 psb = pmc_pcpu[cpu]->pc_sb; 3872 3873 /* 3874 * Iterate through all deferred callchain requests. 3875 */ 3876 3877 for (i = 0; i < pmc_nsamples; i++) { 3878 3879 ps = &psb->ps_samples[i]; 3880 if (ps->ps_nsamples != PMC_SAMPLE_INUSE) 3881 continue; 3882 3883 pm = ps->ps_pmc; 3884 3885 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, 3886 ("[pmc,%d] Retrieving callchain for PMC that doesn't " 3887 "want it", __LINE__)); 3888 3889 /* 3890 * Retrieve the callchain and mark the sample buffer 3891 * as 'processable' by the timer tick sweep code. 3892 */ 3893 ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, 3894 pmc_callchaindepth, tf); 3895 } 3896 3897 return; 3898 } 3899 3900 3901 /* 3902 * Process saved PC samples. 3903 */ 3904 3905 static void 3906 pmc_process_samples(int cpu) 3907 { 3908 int n, ri; 3909 struct pmc *pm; 3910 struct thread *td; 3911 struct pmc_owner *po; 3912 struct pmc_sample *ps; 3913 struct pmc_samplebuffer *psb; 3914 3915 KASSERT(PCPU_GET(cpuid) == cpu, 3916 ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, 3917 PCPU_GET(cpuid), cpu)); 3918 3919 psb = pmc_pcpu[cpu]->pc_sb; 3920 3921 for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ 3922 3923 ps = psb->ps_read; 3924 if (ps->ps_nsamples == PMC_SAMPLE_FREE) 3925 break; 3926 if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { 3927 /* Need a rescan at a later time. */ 3928 atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); 3929 break; 3930 } 3931 3932 pm = ps->ps_pmc; 3933 po = pm->pm_owner; 3934 3935 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 3936 ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, 3937 pm, PMC_TO_MODE(pm))); 3938 3939 /* Ignore PMCs that have been switched off */ 3940 if (pm->pm_state != PMC_STATE_RUNNING) 3941 goto entrydone; 3942 3943 PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, 3944 pm, ps->ps_nsamples, ps->ps_flags, 3945 (int) (psb->ps_write - psb->ps_samples), 3946 (int) (psb->ps_read - psb->ps_samples)); 3947 3948 /* 3949 * If this is a process-mode PMC that is attached to 3950 * its owner, and if the PC is in user mode, update 3951 * profiling statistics like timer-based profiling 3952 * would have done. 3953 */ 3954 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { 3955 if (ps->ps_flags & PMC_CC_F_USERSPACE) { 3956 td = FIRST_THREAD_IN_PROC(po->po_owner); 3957 addupc_intr(td, ps->ps_pc[0], 1); 3958 } 3959 goto entrydone; 3960 } 3961 3962 /* 3963 * Otherwise, this is either a sampling mode PMC that 3964 * is attached to a different process than its owner, 3965 * or a system-wide sampling PMC. Dispatch a log 3966 * entry to the PMC's owner process. 3967 */ 3968 3969 pmclog_process_callchain(pm, ps); 3970 3971 entrydone: 3972 ps->ps_nsamples = 0; /* mark entry as free */ 3973 atomic_subtract_rel_32(&pm->pm_runcount, 1); 3974 3975 /* increment read pointer, modulo sample size */ 3976 if (++ps == psb->ps_fence) 3977 psb->ps_read = psb->ps_samples; 3978 else 3979 psb->ps_read = ps; 3980 } 3981 3982 atomic_add_int(&pmc_stats.pm_log_sweeps, 1); 3983 3984 /* Do not re-enable stalled PMCs if we failed to process any samples */ 3985 if (n == 0) 3986 return; 3987 3988 /* 3989 * Restart any stalled sampling PMCs on this CPU. 3990 * 3991 * If the NMI handler sets the pm_stalled field of a PMC after 3992 * the check below, we'll end up processing the stalled PMC at 3993 * the next hardclock tick. 3994 */ 3995 for (n = 0; n < md->pmd_npmc; n++) { 3996 (void) (*md->pmd_get_config)(cpu,n,&pm); 3997 if (pm == NULL || /* !cfg'ed */ 3998 pm->pm_state != PMC_STATE_RUNNING || /* !active */ 3999 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ 4000 pm->pm_stalled == 0) /* !stalled */ 4001 continue; 4002 4003 pm->pm_stalled = 0; 4004 ri = PMC_TO_ROWINDEX(pm); 4005 (*md->pmd_start_pmc)(cpu, ri); 4006 } 4007 } 4008 4009 /* 4010 * Event handlers. 4011 */ 4012 4013 /* 4014 * Handle a process exit. 4015 * 4016 * Remove this process from all hash tables. If this process 4017 * owned any PMCs, turn off those PMCs and deallocate them, 4018 * removing any associations with target processes. 4019 * 4020 * This function will be called by the last 'thread' of a 4021 * process. 4022 * 4023 * XXX This eventhandler gets called early in the exit process. 4024 * Consider using a 'hook' invocation from thread_exit() or equivalent 4025 * spot. Another negative is that kse_exit doesn't seem to call 4026 * exit1() [??]. 4027 * 4028 */ 4029 4030 static void 4031 pmc_process_exit(void *arg __unused, struct proc *p) 4032 { 4033 int is_using_hwpmcs; 4034 int cpu; 4035 unsigned int ri; 4036 struct pmc *pm; 4037 struct pmc_process *pp; 4038 struct pmc_owner *po; 4039 pmc_value_t newvalue, tmp; 4040 4041 PROC_LOCK(p); 4042 is_using_hwpmcs = p->p_flag & P_HWPMC; 4043 PROC_UNLOCK(p); 4044 4045 /* 4046 * Log a sysexit event to all SS PMC owners. 4047 */ 4048 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4049 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4050 pmclog_process_sysexit(po, p->p_pid); 4051 4052 if (!is_using_hwpmcs) 4053 return; 4054 4055 PMC_GET_SX_XLOCK(); 4056 PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, 4057 p->p_comm); 4058 4059 /* 4060 * Since this code is invoked by the last thread in an exiting 4061 * process, we would have context switched IN at some prior 4062 * point. However, with PREEMPTION, kernel mode context 4063 * switches may happen any time, so we want to disable a 4064 * context switch OUT till we get any PMCs targetting this 4065 * process off the hardware. 4066 * 4067 * We also need to atomically remove this process' 4068 * entry from our target process hash table, using 4069 * PMC_FLAG_REMOVE. 4070 */ 4071 PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, 4072 p->p_comm); 4073 4074 critical_enter(); /* no preemption */ 4075 4076 cpu = curthread->td_oncpu; 4077 4078 if ((pp = pmc_find_process_descriptor(p, 4079 PMC_FLAG_REMOVE)) != NULL) { 4080 4081 PMCDBG(PRC,EXT,2, 4082 "process-exit proc=%p pmc-process=%p", p, pp); 4083 4084 /* 4085 * The exiting process could the target of 4086 * some PMCs which will be running on 4087 * currently executing CPU. 4088 * 4089 * We need to turn these PMCs off like we 4090 * would do at context switch OUT time. 4091 */ 4092 for (ri = 0; ri < md->pmd_npmc; ri++) { 4093 4094 /* 4095 * Pick up the pmc pointer from hardware 4096 * state similar to the CSW_OUT code. 4097 */ 4098 pm = NULL; 4099 (void) (*md->pmd_get_config)(cpu, ri, &pm); 4100 4101 PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm); 4102 4103 if (pm == NULL || 4104 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 4105 continue; 4106 4107 PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " 4108 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, 4109 pm, pm->pm_state); 4110 4111 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 4112 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 4113 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 4114 4115 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 4116 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", 4117 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); 4118 4119 (void) md->pmd_stop_pmc(cpu, ri); 4120 4121 KASSERT(pm->pm_runcount > 0, 4122 ("[pmc,%d] bad runcount ri %d rc %d", 4123 __LINE__, ri, pm->pm_runcount)); 4124 4125 /* Stop hardware only if it is actually running */ 4126 if (pm->pm_state == PMC_STATE_RUNNING && 4127 pm->pm_stalled == 0) { 4128 md->pmd_read_pmc(cpu, ri, &newvalue); 4129 tmp = newvalue - 4130 PMC_PCPU_SAVED(cpu,ri); 4131 4132 mtx_pool_lock_spin(pmc_mtxpool, pm); 4133 pm->pm_gv.pm_savedvalue += tmp; 4134 pp->pp_pmcs[ri].pp_pmcval += tmp; 4135 mtx_pool_unlock_spin(pmc_mtxpool, pm); 4136 } 4137 4138 atomic_subtract_rel_32(&pm->pm_runcount,1); 4139 4140 KASSERT((int) pm->pm_runcount >= 0, 4141 ("[pmc,%d] runcount is %d", __LINE__, ri)); 4142 4143 (void) md->pmd_config_pmc(cpu, ri, NULL); 4144 } 4145 4146 /* 4147 * Inform the MD layer of this pseudo "context switch 4148 * out" 4149 */ 4150 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); 4151 4152 critical_exit(); /* ok to be pre-empted now */ 4153 4154 /* 4155 * Unlink this process from the PMCs that are 4156 * targetting it. This will send a signal to 4157 * all PMC owner's whose PMCs are orphaned. 4158 * 4159 * Log PMC value at exit time if requested. 4160 */ 4161 for (ri = 0; ri < md->pmd_npmc; ri++) 4162 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 4163 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 4164 PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) 4165 pmclog_process_procexit(pm, pp); 4166 pmc_unlink_target_process(pm, pp); 4167 } 4168 FREE(pp, M_PMC); 4169 4170 } else 4171 critical_exit(); /* pp == NULL */ 4172 4173 4174 /* 4175 * If the process owned PMCs, free them up and free up 4176 * memory. 4177 */ 4178 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 4179 pmc_remove_owner(po); 4180 pmc_destroy_owner_descriptor(po); 4181 } 4182 4183 sx_xunlock(&pmc_sx); 4184 } 4185 4186 /* 4187 * Handle a process fork. 4188 * 4189 * If the parent process 'p1' is under HWPMC monitoring, then copy 4190 * over any attached PMCs that have 'do_descendants' semantics. 4191 */ 4192 4193 static void 4194 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, 4195 int flags) 4196 { 4197 int is_using_hwpmcs; 4198 unsigned int ri; 4199 uint32_t do_descendants; 4200 struct pmc *pm; 4201 struct pmc_owner *po; 4202 struct pmc_process *ppnew, *ppold; 4203 4204 (void) flags; /* unused parameter */ 4205 4206 PROC_LOCK(p1); 4207 is_using_hwpmcs = p1->p_flag & P_HWPMC; 4208 PROC_UNLOCK(p1); 4209 4210 /* 4211 * If there are system-wide sampling PMCs active, we need to 4212 * log all fork events to their owner's logs. 4213 */ 4214 4215 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4216 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4217 pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); 4218 4219 if (!is_using_hwpmcs) 4220 return; 4221 4222 PMC_GET_SX_XLOCK(); 4223 PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, 4224 p1->p_pid, p1->p_comm, newproc); 4225 4226 /* 4227 * If the parent process (curthread->td_proc) is a 4228 * target of any PMCs, look for PMCs that are to be 4229 * inherited, and link these into the new process 4230 * descriptor. 4231 */ 4232 if ((ppold = pmc_find_process_descriptor(curthread->td_proc, 4233 PMC_FLAG_NONE)) == NULL) 4234 goto done; /* nothing to do */ 4235 4236 do_descendants = 0; 4237 for (ri = 0; ri < md->pmd_npmc; ri++) 4238 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) 4239 do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; 4240 if (do_descendants == 0) /* nothing to do */ 4241 goto done; 4242 4243 /* allocate a descriptor for the new process */ 4244 if ((ppnew = pmc_find_process_descriptor(newproc, 4245 PMC_FLAG_ALLOCATE)) == NULL) 4246 goto done; 4247 4248 /* 4249 * Run through all PMCs that were targeting the old process 4250 * and which specified F_DESCENDANTS and attach them to the 4251 * new process. 4252 * 4253 * Log the fork event to all owners of PMCs attached to this 4254 * process, if not already logged. 4255 */ 4256 for (ri = 0; ri < md->pmd_npmc; ri++) 4257 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && 4258 (pm->pm_flags & PMC_F_DESCENDANTS)) { 4259 pmc_link_target_process(pm, ppnew); 4260 po = pm->pm_owner; 4261 if (po->po_sscount == 0 && 4262 po->po_flags & PMC_PO_OWNS_LOGFILE) 4263 pmclog_process_procfork(po, p1->p_pid, 4264 newproc->p_pid); 4265 } 4266 4267 /* 4268 * Now mark the new process as being tracked by this driver. 4269 */ 4270 PROC_LOCK(newproc); 4271 newproc->p_flag |= P_HWPMC; 4272 PROC_UNLOCK(newproc); 4273 4274 done: 4275 sx_xunlock(&pmc_sx); 4276 } 4277 4278 4279 /* 4280 * initialization 4281 */ 4282 4283 static const char *pmc_name_of_pmcclass[] = { 4284 #undef __PMC_CLASS 4285 #define __PMC_CLASS(N) #N , 4286 __PMC_CLASSES() 4287 }; 4288 4289 static int 4290 pmc_initialize(void) 4291 { 4292 int cpu, error, n; 4293 struct pmc_binding pb; 4294 struct pmc_sample *ps; 4295 struct pmc_samplebuffer *sb; 4296 4297 md = NULL; 4298 error = 0; 4299 4300 #ifdef DEBUG 4301 /* parse debug flags first */ 4302 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", 4303 pmc_debugstr, sizeof(pmc_debugstr))) 4304 pmc_debugflags_parse(pmc_debugstr, 4305 pmc_debugstr+strlen(pmc_debugstr)); 4306 #endif 4307 4308 PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); 4309 4310 /* check kernel version */ 4311 if (pmc_kernel_version != PMC_VERSION) { 4312 if (pmc_kernel_version == 0) 4313 printf("hwpmc: this kernel has not been compiled with " 4314 "'options HWPMC_HOOKS'.\n"); 4315 else 4316 printf("hwpmc: kernel version (0x%x) does not match " 4317 "module version (0x%x).\n", pmc_kernel_version, 4318 PMC_VERSION); 4319 return EPROGMISMATCH; 4320 } 4321 4322 /* 4323 * check sysctl parameters 4324 */ 4325 4326 if (pmc_hashsize <= 0) { 4327 (void) printf("hwpmc: tunable \"hashsize\"=%d must be " 4328 "greater than zero.\n", pmc_hashsize); 4329 pmc_hashsize = PMC_HASH_SIZE; 4330 } 4331 4332 if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { 4333 (void) printf("hwpmc: tunable \"nsamples\"=%d out of " 4334 "range.\n", pmc_nsamples); 4335 pmc_nsamples = PMC_NSAMPLES; 4336 } 4337 4338 if (pmc_callchaindepth <= 0 || 4339 pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { 4340 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " 4341 "range.\n", pmc_callchaindepth); 4342 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; 4343 } 4344 4345 md = pmc_md_initialize(); 4346 4347 if (md == NULL || md->pmd_init == NULL) 4348 return ENOSYS; 4349 4350 /* allocate space for the per-cpu array */ 4351 MALLOC(pmc_pcpu, struct pmc_cpu **, mp_ncpus * sizeof(struct pmc_cpu *), 4352 M_PMC, M_WAITOK|M_ZERO); 4353 4354 /* per-cpu 'saved values' for managing process-mode PMCs */ 4355 MALLOC(pmc_pcpu_saved, pmc_value_t *, 4356 sizeof(pmc_value_t) * mp_ncpus * md->pmd_npmc, M_PMC, M_WAITOK); 4357 4358 /* perform cpu dependent initialization */ 4359 pmc_save_cpu_binding(&pb); 4360 for (cpu = 0; cpu < mp_ncpus; cpu++) { 4361 if (pmc_cpu_is_disabled(cpu)) 4362 continue; 4363 pmc_select_cpu(cpu); 4364 if ((error = md->pmd_init(cpu)) != 0) 4365 break; 4366 } 4367 pmc_restore_cpu_binding(&pb); 4368 4369 if (error != 0) 4370 return error; 4371 4372 /* allocate space for the sample array */ 4373 for (cpu = 0; cpu < mp_ncpus; cpu++) { 4374 if (pmc_cpu_is_disabled(cpu)) 4375 continue; 4376 MALLOC(sb, struct pmc_samplebuffer *, 4377 sizeof(struct pmc_samplebuffer) + 4378 pmc_nsamples * sizeof(struct pmc_sample), M_PMC, 4379 M_WAITOK|M_ZERO); 4380 4381 sb->ps_read = sb->ps_write = sb->ps_samples; 4382 sb->ps_fence = sb->ps_samples + pmc_nsamples; 4383 KASSERT(pmc_pcpu[cpu] != NULL, 4384 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); 4385 4386 MALLOC(sb->ps_callchains, uintptr_t *, 4387 pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), 4388 M_PMC, M_WAITOK|M_ZERO); 4389 4390 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) 4391 ps->ps_pc = sb->ps_callchains + 4392 (n * pmc_callchaindepth); 4393 4394 pmc_pcpu[cpu]->pc_sb = sb; 4395 } 4396 4397 /* allocate space for the row disposition array */ 4398 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, 4399 M_PMC, M_WAITOK|M_ZERO); 4400 4401 KASSERT(pmc_pmcdisp != NULL, 4402 ("[pmc,%d] pmcdisp allocation returned NULL", __LINE__)); 4403 4404 /* mark all PMCs as available */ 4405 for (n = 0; n < (int) md->pmd_npmc; n++) 4406 PMC_MARK_ROW_FREE(n); 4407 4408 /* allocate thread hash tables */ 4409 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, 4410 &pmc_ownerhashmask); 4411 4412 pmc_processhash = hashinit(pmc_hashsize, M_PMC, 4413 &pmc_processhashmask); 4414 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", 4415 MTX_SPIN); 4416 4417 LIST_INIT(&pmc_ss_owners); 4418 pmc_ss_count = 0; 4419 4420 /* allocate a pool of spin mutexes */ 4421 pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, 4422 MTX_SPIN); 4423 4424 PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " 4425 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, 4426 pmc_processhash, pmc_processhashmask); 4427 4428 /* register process {exit,fork,exec} handlers */ 4429 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, 4430 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); 4431 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, 4432 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); 4433 4434 /* initialize logging */ 4435 pmclog_initialize(); 4436 4437 /* set hook functions */ 4438 pmc_intr = md->pmd_intr; 4439 pmc_hook = pmc_hook_handler; 4440 4441 if (error == 0) { 4442 printf(PMC_MODULE_NAME ":"); 4443 for (n = 0; n < (int) md->pmd_nclass; n++) { 4444 printf(" %s/%d/0x%b", 4445 pmc_name_of_pmcclass[md->pmd_classes[n].pm_class], 4446 md->pmd_nclasspmcs[n], 4447 md->pmd_classes[n].pm_caps, 4448 "\20" 4449 "\1INT\2USR\3SYS\4EDG\5THR" 4450 "\6REA\7WRI\10INV\11QUA\12PRC" 4451 "\13TAG\14CSC"); 4452 } 4453 printf("\n"); 4454 } 4455 4456 return error; 4457 } 4458 4459 /* prepare to be unloaded */ 4460 static void 4461 pmc_cleanup(void) 4462 { 4463 int cpu; 4464 struct pmc_ownerhash *ph; 4465 struct pmc_owner *po, *tmp; 4466 struct pmc_binding pb; 4467 #ifdef DEBUG 4468 struct pmc_processhash *prh; 4469 #endif 4470 4471 PMCDBG(MOD,INI,0, "%s", "cleanup"); 4472 4473 /* switch off sampling */ 4474 atomic_store_rel_int(&pmc_cpumask, 0); 4475 pmc_intr = NULL; 4476 4477 sx_xlock(&pmc_sx); 4478 if (pmc_hook == NULL) { /* being unloaded already */ 4479 sx_xunlock(&pmc_sx); 4480 return; 4481 } 4482 4483 pmc_hook = NULL; /* prevent new threads from entering module */ 4484 4485 /* deregister event handlers */ 4486 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); 4487 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); 4488 4489 /* send SIGBUS to all owner threads, free up allocations */ 4490 if (pmc_ownerhash) 4491 for (ph = pmc_ownerhash; 4492 ph <= &pmc_ownerhash[pmc_ownerhashmask]; 4493 ph++) { 4494 LIST_FOREACH_SAFE(po, ph, po_next, tmp) { 4495 pmc_remove_owner(po); 4496 4497 /* send SIGBUS to owner processes */ 4498 PMCDBG(MOD,INI,2, "cleanup signal proc=%p " 4499 "(%d, %s)", po->po_owner, 4500 po->po_owner->p_pid, 4501 po->po_owner->p_comm); 4502 4503 PROC_LOCK(po->po_owner); 4504 psignal(po->po_owner, SIGBUS); 4505 PROC_UNLOCK(po->po_owner); 4506 4507 pmc_destroy_owner_descriptor(po); 4508 } 4509 } 4510 4511 /* reclaim allocated data structures */ 4512 if (pmc_mtxpool) 4513 mtx_pool_destroy(&pmc_mtxpool); 4514 4515 mtx_destroy(&pmc_processhash_mtx); 4516 if (pmc_processhash) { 4517 #ifdef DEBUG 4518 struct pmc_process *pp; 4519 4520 PMCDBG(MOD,INI,3, "%s", "destroy process hash"); 4521 for (prh = pmc_processhash; 4522 prh <= &pmc_processhash[pmc_processhashmask]; 4523 prh++) 4524 LIST_FOREACH(pp, prh, pp_next) 4525 PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); 4526 #endif 4527 4528 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); 4529 pmc_processhash = NULL; 4530 } 4531 4532 if (pmc_ownerhash) { 4533 PMCDBG(MOD,INI,3, "%s", "destroy owner hash"); 4534 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); 4535 pmc_ownerhash = NULL; 4536 } 4537 4538 KASSERT(LIST_EMPTY(&pmc_ss_owners), 4539 ("[pmc,%d] Global SS owner list not empty", __LINE__)); 4540 KASSERT(pmc_ss_count == 0, 4541 ("[pmc,%d] Global SS count not empty", __LINE__)); 4542 4543 /* free the per-cpu sample buffers */ 4544 for (cpu = 0; cpu < mp_ncpus; cpu++) { 4545 if (pmc_cpu_is_disabled(cpu)) 4546 continue; 4547 KASSERT(pmc_pcpu[cpu]->pc_sb != NULL, 4548 ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__, 4549 cpu)); 4550 FREE(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC); 4551 FREE(pmc_pcpu[cpu]->pc_sb, M_PMC); 4552 pmc_pcpu[cpu]->pc_sb = NULL; 4553 } 4554 4555 /* do processor dependent cleanup */ 4556 PMCDBG(MOD,INI,3, "%s", "md cleanup"); 4557 if (md) { 4558 pmc_save_cpu_binding(&pb); 4559 for (cpu = 0; cpu < mp_ncpus; cpu++) { 4560 PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", 4561 cpu, pmc_pcpu[cpu]); 4562 if (pmc_cpu_is_disabled(cpu)) 4563 continue; 4564 pmc_select_cpu(cpu); 4565 if (pmc_pcpu[cpu]) 4566 (void) md->pmd_cleanup(cpu); 4567 } 4568 FREE(md, M_PMC); 4569 md = NULL; 4570 pmc_restore_cpu_binding(&pb); 4571 } 4572 4573 /* deallocate per-cpu structures */ 4574 FREE(pmc_pcpu, M_PMC); 4575 pmc_pcpu = NULL; 4576 4577 FREE(pmc_pcpu_saved, M_PMC); 4578 pmc_pcpu_saved = NULL; 4579 4580 if (pmc_pmcdisp) { 4581 FREE(pmc_pmcdisp, M_PMC); 4582 pmc_pmcdisp = NULL; 4583 } 4584 4585 pmclog_shutdown(); 4586 4587 sx_xunlock(&pmc_sx); /* we are done */ 4588 } 4589 4590 /* 4591 * The function called at load/unload. 4592 */ 4593 4594 static int 4595 load (struct module *module __unused, int cmd, void *arg __unused) 4596 { 4597 int error; 4598 4599 error = 0; 4600 4601 switch (cmd) { 4602 case MOD_LOAD : 4603 /* initialize the subsystem */ 4604 error = pmc_initialize(); 4605 if (error != 0) 4606 break; 4607 PMCDBG(MOD,INI,1, "syscall=%d ncpus=%d", 4608 pmc_syscall_num, mp_ncpus); 4609 break; 4610 4611 4612 case MOD_UNLOAD : 4613 case MOD_SHUTDOWN: 4614 pmc_cleanup(); 4615 PMCDBG(MOD,INI,1, "%s", "unloaded"); 4616 break; 4617 4618 default : 4619 error = EINVAL; /* XXX should panic(9) */ 4620 break; 4621 } 4622 4623 return error; 4624 } 4625 4626 /* memory pool */ 4627 MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); 4628