1 /*- 2 * Copyright (c) 2003-2008 Joseph Koshy 3 * Copyright (c) 2007 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by A. Joseph Koshy under 7 * sponsorship from the FreeBSD Foundation and Google, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/eventhandler.h> 37 #include <sys/jail.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/mutex.h> 45 #include <sys/pmc.h> 46 #include <sys/pmckern.h> 47 #include <sys/pmclog.h> 48 #include <sys/priv.h> 49 #include <sys/proc.h> 50 #include <sys/queue.h> 51 #include <sys/resourcevar.h> 52 #include <sys/sched.h> 53 #include <sys/signalvar.h> 54 #include <sys/smp.h> 55 #include <sys/sx.h> 56 #include <sys/sysctl.h> 57 #include <sys/sysent.h> 58 #include <sys/systm.h> 59 #include <sys/vnode.h> 60 61 #include <sys/linker.h> /* needs to be after <sys/malloc.h> */ 62 63 #include <machine/atomic.h> 64 #include <machine/md_var.h> 65 66 /* 67 * Types 68 */ 69 70 enum pmc_flags { 71 PMC_FLAG_NONE = 0x00, /* do nothing */ 72 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ 73 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ 74 }; 75 76 /* 77 * The offset in sysent where the syscall is allocated. 78 */ 79 80 static int pmc_syscall_num = NO_SYSCALL; 81 struct pmc_cpu **pmc_pcpu; /* per-cpu state */ 82 pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ 83 84 #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] 85 86 struct mtx_pool *pmc_mtxpool; 87 static int *pmc_pmcdisp; /* PMC row dispositions */ 88 89 #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) 90 #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) 91 #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) 92 93 #define PMC_MARK_ROW_FREE(R) do { \ 94 pmc_pmcdisp[(R)] = 0; \ 95 } while (0) 96 97 #define PMC_MARK_ROW_STANDALONE(R) do { \ 98 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 99 __LINE__)); \ 100 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 101 KASSERT(pmc_pmcdisp[(R)] >= (-pmc_cpu_max_active()), \ 102 ("[pmc,%d] row disposition error", __LINE__)); \ 103 } while (0) 104 105 #define PMC_UNMARK_ROW_STANDALONE(R) do { \ 106 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 107 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 108 __LINE__)); \ 109 } while (0) 110 111 #define PMC_MARK_ROW_THREAD(R) do { \ 112 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 113 __LINE__)); \ 114 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 115 } while (0) 116 117 #define PMC_UNMARK_ROW_THREAD(R) do { \ 118 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 119 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 120 __LINE__)); \ 121 } while (0) 122 123 124 /* various event handlers */ 125 static eventhandler_tag pmc_exit_tag, pmc_fork_tag; 126 127 /* Module statistics */ 128 struct pmc_op_getdriverstats pmc_stats; 129 130 /* Machine/processor dependent operations */ 131 struct pmc_mdep *md; 132 133 /* 134 * Hash tables mapping owner processes and target threads to PMCs. 135 */ 136 137 struct mtx pmc_processhash_mtx; /* spin mutex */ 138 static u_long pmc_processhashmask; 139 static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; 140 141 /* 142 * Hash table of PMC owner descriptors. This table is protected by 143 * the shared PMC "sx" lock. 144 */ 145 146 static u_long pmc_ownerhashmask; 147 static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; 148 149 /* 150 * List of PMC owners with system-wide sampling PMCs. 151 */ 152 153 static LIST_HEAD(, pmc_owner) pmc_ss_owners; 154 155 156 /* 157 * Prototypes 158 */ 159 160 #ifdef DEBUG 161 static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); 162 static int pmc_debugflags_parse(char *newstr, char *fence); 163 #endif 164 165 static int load(struct module *module, int cmd, void *arg); 166 static int pmc_attach_process(struct proc *p, struct pmc *pm); 167 static struct pmc *pmc_allocate_pmc_descriptor(void); 168 static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p); 169 static int pmc_attach_one_process(struct proc *p, struct pmc *pm); 170 static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, 171 int cpu); 172 static int pmc_can_attach(struct pmc *pm, struct proc *p); 173 static void pmc_capture_user_callchain(int cpu, struct trapframe *tf); 174 static void pmc_cleanup(void); 175 static int pmc_detach_process(struct proc *p, struct pmc *pm); 176 static int pmc_detach_one_process(struct proc *p, struct pmc *pm, 177 int flags); 178 static void pmc_destroy_owner_descriptor(struct pmc_owner *po); 179 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); 180 static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); 181 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, 182 pmc_id_t pmc); 183 static struct pmc_process *pmc_find_process_descriptor(struct proc *p, 184 uint32_t mode); 185 static void pmc_force_context_switch(void); 186 static void pmc_link_target_process(struct pmc *pm, 187 struct pmc_process *pp); 188 static void pmc_log_all_process_mappings(struct pmc_owner *po); 189 static void pmc_log_kernel_mappings(struct pmc *pm); 190 static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); 191 static void pmc_maybe_remove_owner(struct pmc_owner *po); 192 static void pmc_process_csw_in(struct thread *td); 193 static void pmc_process_csw_out(struct thread *td); 194 static void pmc_process_exit(void *arg, struct proc *p); 195 static void pmc_process_fork(void *arg, struct proc *p1, 196 struct proc *p2, int n); 197 static void pmc_process_samples(int cpu); 198 static void pmc_release_pmc_descriptor(struct pmc *pmc); 199 static void pmc_remove_owner(struct pmc_owner *po); 200 static void pmc_remove_process_descriptor(struct pmc_process *pp); 201 static void pmc_restore_cpu_binding(struct pmc_binding *pb); 202 static void pmc_save_cpu_binding(struct pmc_binding *pb); 203 static void pmc_select_cpu(int cpu); 204 static int pmc_start(struct pmc *pm); 205 static int pmc_stop(struct pmc *pm); 206 static int pmc_syscall_handler(struct thread *td, void *syscall_args); 207 static void pmc_unlink_target_process(struct pmc *pmc, 208 struct pmc_process *pp); 209 210 /* 211 * Kernel tunables and sysctl(8) interface. 212 */ 213 214 SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters"); 215 216 static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; 217 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth); 218 SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD, 219 &pmc_callchaindepth, 0, "depth of call chain records"); 220 221 #ifdef DEBUG 222 struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; 223 char pmc_debugstr[PMC_DEBUG_STRSIZE]; 224 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, 225 sizeof(pmc_debugstr)); 226 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, 227 CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_TUN, 228 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); 229 #endif 230 231 /* 232 * kern.hwpmc.hashrows -- determines the number of rows in the 233 * of the hash table used to look up threads 234 */ 235 236 static int pmc_hashsize = PMC_HASH_SIZE; 237 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "hashsize", &pmc_hashsize); 238 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD, 239 &pmc_hashsize, 0, "rows in hash tables"); 240 241 /* 242 * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU 243 */ 244 245 static int pmc_nsamples = PMC_NSAMPLES; 246 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples); 247 SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD, 248 &pmc_nsamples, 0, "number of PC samples per CPU"); 249 250 251 /* 252 * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. 253 */ 254 255 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; 256 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "mtxpoolsize", &pmc_mtxpool_size); 257 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD, 258 &pmc_mtxpool_size, 0, "size of spin mutex pool"); 259 260 261 /* 262 * security.bsd.unprivileged_syspmcs -- allow non-root processes to 263 * allocate system-wide PMCs. 264 * 265 * Allowing unprivileged processes to allocate system PMCs is convenient 266 * if system-wide measurements need to be taken concurrently with other 267 * per-process measurements. This feature is turned off by default. 268 */ 269 270 static int pmc_unprivileged_syspmcs = 0; 271 TUNABLE_INT("security.bsd.unprivileged_syspmcs", &pmc_unprivileged_syspmcs); 272 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW, 273 &pmc_unprivileged_syspmcs, 0, 274 "allow unprivileged process to allocate system PMCs"); 275 276 /* 277 * Hash function. Discard the lower 2 bits of the pointer since 278 * these are always zero for our uses. The hash multiplier is 279 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). 280 */ 281 282 #if LONG_BIT == 64 283 #define _PMC_HM 11400714819323198486u 284 #elif LONG_BIT == 32 285 #define _PMC_HM 2654435769u 286 #else 287 #error Must know the size of 'long' to compile 288 #endif 289 290 #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) 291 292 /* 293 * Syscall structures 294 */ 295 296 /* The `sysent' for the new syscall */ 297 static struct sysent pmc_sysent = { 298 2, /* sy_narg */ 299 pmc_syscall_handler /* sy_call */ 300 }; 301 302 static struct syscall_module_data pmc_syscall_mod = { 303 load, 304 NULL, 305 &pmc_syscall_num, 306 &pmc_sysent, 307 { 0, NULL } 308 }; 309 310 static moduledata_t pmc_mod = { 311 PMC_MODULE_NAME, 312 syscall_module_handler, 313 &pmc_syscall_mod 314 }; 315 316 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); 317 MODULE_VERSION(pmc, PMC_VERSION); 318 319 #ifdef DEBUG 320 enum pmc_dbgparse_state { 321 PMCDS_WS, /* in whitespace */ 322 PMCDS_MAJOR, /* seen a major keyword */ 323 PMCDS_MINOR 324 }; 325 326 static int 327 pmc_debugflags_parse(char *newstr, char *fence) 328 { 329 char c, *p, *q; 330 struct pmc_debugflags *tmpflags; 331 int error, found, *newbits, tmp; 332 size_t kwlen; 333 334 tmpflags = malloc(sizeof(*tmpflags), M_PMC, M_WAITOK|M_ZERO); 335 336 p = newstr; 337 error = 0; 338 339 for (; p < fence && (c = *p); p++) { 340 341 /* skip white space */ 342 if (c == ' ' || c == '\t') 343 continue; 344 345 /* look for a keyword followed by "=" */ 346 for (q = p; p < fence && (c = *p) && c != '='; p++) 347 ; 348 if (c != '=') { 349 error = EINVAL; 350 goto done; 351 } 352 353 kwlen = p - q; 354 newbits = NULL; 355 356 /* lookup flag group name */ 357 #define DBG_SET_FLAG_MAJ(S,F) \ 358 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 359 newbits = &tmpflags->pdb_ ## F; 360 361 DBG_SET_FLAG_MAJ("cpu", CPU); 362 DBG_SET_FLAG_MAJ("csw", CSW); 363 DBG_SET_FLAG_MAJ("logging", LOG); 364 DBG_SET_FLAG_MAJ("module", MOD); 365 DBG_SET_FLAG_MAJ("md", MDP); 366 DBG_SET_FLAG_MAJ("owner", OWN); 367 DBG_SET_FLAG_MAJ("pmc", PMC); 368 DBG_SET_FLAG_MAJ("process", PRC); 369 DBG_SET_FLAG_MAJ("sampling", SAM); 370 371 if (newbits == NULL) { 372 error = EINVAL; 373 goto done; 374 } 375 376 p++; /* skip the '=' */ 377 378 /* Now parse the individual flags */ 379 tmp = 0; 380 newflag: 381 for (q = p; p < fence && (c = *p); p++) 382 if (c == ' ' || c == '\t' || c == ',') 383 break; 384 385 /* p == fence or c == ws or c == "," or c == 0 */ 386 387 if ((kwlen = p - q) == 0) { 388 *newbits = tmp; 389 continue; 390 } 391 392 found = 0; 393 #define DBG_SET_FLAG_MIN(S,F) \ 394 if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \ 395 tmp |= found = (1 << PMC_DEBUG_MIN_ ## F) 396 397 /* a '*' denotes all possible flags in the group */ 398 if (kwlen == 1 && *q == '*') 399 tmp = found = ~0; 400 /* look for individual flag names */ 401 DBG_SET_FLAG_MIN("allocaterow", ALR); 402 DBG_SET_FLAG_MIN("allocate", ALL); 403 DBG_SET_FLAG_MIN("attach", ATT); 404 DBG_SET_FLAG_MIN("bind", BND); 405 DBG_SET_FLAG_MIN("config", CFG); 406 DBG_SET_FLAG_MIN("exec", EXC); 407 DBG_SET_FLAG_MIN("exit", EXT); 408 DBG_SET_FLAG_MIN("find", FND); 409 DBG_SET_FLAG_MIN("flush", FLS); 410 DBG_SET_FLAG_MIN("fork", FRK); 411 DBG_SET_FLAG_MIN("getbuf", GTB); 412 DBG_SET_FLAG_MIN("hook", PMH); 413 DBG_SET_FLAG_MIN("init", INI); 414 DBG_SET_FLAG_MIN("intr", INT); 415 DBG_SET_FLAG_MIN("linktarget", TLK); 416 DBG_SET_FLAG_MIN("mayberemove", OMR); 417 DBG_SET_FLAG_MIN("ops", OPS); 418 DBG_SET_FLAG_MIN("read", REA); 419 DBG_SET_FLAG_MIN("register", REG); 420 DBG_SET_FLAG_MIN("release", REL); 421 DBG_SET_FLAG_MIN("remove", ORM); 422 DBG_SET_FLAG_MIN("sample", SAM); 423 DBG_SET_FLAG_MIN("scheduleio", SIO); 424 DBG_SET_FLAG_MIN("select", SEL); 425 DBG_SET_FLAG_MIN("signal", SIG); 426 DBG_SET_FLAG_MIN("swi", SWI); 427 DBG_SET_FLAG_MIN("swo", SWO); 428 DBG_SET_FLAG_MIN("start", STA); 429 DBG_SET_FLAG_MIN("stop", STO); 430 DBG_SET_FLAG_MIN("syscall", PMS); 431 DBG_SET_FLAG_MIN("unlinktarget", TUL); 432 DBG_SET_FLAG_MIN("write", WRI); 433 if (found == 0) { 434 /* unrecognized flag name */ 435 error = EINVAL; 436 goto done; 437 } 438 439 if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */ 440 *newbits = tmp; 441 continue; 442 } 443 444 p++; 445 goto newflag; 446 } 447 448 /* save the new flag set */ 449 bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags)); 450 451 done: 452 free(tmpflags, M_PMC); 453 return error; 454 } 455 456 static int 457 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) 458 { 459 char *fence, *newstr; 460 int error; 461 unsigned int n; 462 463 (void) arg1; (void) arg2; /* unused parameters */ 464 465 n = sizeof(pmc_debugstr); 466 newstr = malloc(n, M_PMC, M_ZERO|M_WAITOK); 467 (void) strlcpy(newstr, pmc_debugstr, n); 468 469 error = sysctl_handle_string(oidp, newstr, n, req); 470 471 /* if there is a new string, parse and copy it */ 472 if (error == 0 && req->newptr != NULL) { 473 fence = newstr + (n < req->newlen ? n : req->newlen + 1); 474 if ((error = pmc_debugflags_parse(newstr, fence)) == 0) 475 (void) strlcpy(pmc_debugstr, newstr, 476 sizeof(pmc_debugstr)); 477 } 478 479 free(newstr, M_PMC); 480 481 return error; 482 } 483 #endif 484 485 /* 486 * Concurrency Control 487 * 488 * The driver manages the following data structures: 489 * 490 * - target process descriptors, one per target process 491 * - owner process descriptors (and attached lists), one per owner process 492 * - lookup hash tables for owner and target processes 493 * - PMC descriptors (and attached lists) 494 * - per-cpu hardware state 495 * - the 'hook' variable through which the kernel calls into 496 * this module 497 * - the machine hardware state (managed by the MD layer) 498 * 499 * These data structures are accessed from: 500 * 501 * - thread context-switch code 502 * - interrupt handlers (possibly on multiple cpus) 503 * - kernel threads on multiple cpus running on behalf of user 504 * processes doing system calls 505 * - this driver's private kernel threads 506 * 507 * = Locks and Locking strategy = 508 * 509 * The driver uses four locking strategies for its operation: 510 * 511 * - The global SX lock "pmc_sx" is used to protect internal 512 * data structures. 513 * 514 * Calls into the module by syscall() start with this lock being 515 * held in exclusive mode. Depending on the requested operation, 516 * the lock may be downgraded to 'shared' mode to allow more 517 * concurrent readers into the module. Calls into the module from 518 * other parts of the kernel acquire the lock in shared mode. 519 * 520 * This SX lock is held in exclusive mode for any operations that 521 * modify the linkages between the driver's internal data structures. 522 * 523 * The 'pmc_hook' function pointer is also protected by this lock. 524 * It is only examined with the sx lock held in exclusive mode. The 525 * kernel module is allowed to be unloaded only with the sx lock held 526 * in exclusive mode. In normal syscall handling, after acquiring the 527 * pmc_sx lock we first check that 'pmc_hook' is non-null before 528 * proceeding. This prevents races between the thread unloading the module 529 * and other threads seeking to use the module. 530 * 531 * - Lookups of target process structures and owner process structures 532 * cannot use the global "pmc_sx" SX lock because these lookups need 533 * to happen during context switches and in other critical sections 534 * where sleeping is not allowed. We protect these lookup tables 535 * with their own private spin-mutexes, "pmc_processhash_mtx" and 536 * "pmc_ownerhash_mtx". 537 * 538 * - Interrupt handlers work in a lock free manner. At interrupt 539 * time, handlers look at the PMC pointer (phw->phw_pmc) configured 540 * when the PMC was started. If this pointer is NULL, the interrupt 541 * is ignored after updating driver statistics. We ensure that this 542 * pointer is set (using an atomic operation if necessary) before the 543 * PMC hardware is started. Conversely, this pointer is unset atomically 544 * only after the PMC hardware is stopped. 545 * 546 * We ensure that everything needed for the operation of an 547 * interrupt handler is available without it needing to acquire any 548 * locks. We also ensure that a PMC's software state is destroyed only 549 * after the PMC is taken off hardware (on all CPUs). 550 * 551 * - Context-switch handling with process-private PMCs needs more 552 * care. 553 * 554 * A given process may be the target of multiple PMCs. For example, 555 * PMCATTACH and PMCDETACH may be requested by a process on one CPU 556 * while the target process is running on another. A PMC could also 557 * be getting released because its owner is exiting. We tackle 558 * these situations in the following manner: 559 * 560 * - each target process structure 'pmc_process' has an array 561 * of 'struct pmc *' pointers, one for each hardware PMC. 562 * 563 * - At context switch IN time, each "target" PMC in RUNNING state 564 * gets started on hardware and a pointer to each PMC is copied into 565 * the per-cpu phw array. The 'runcount' for the PMC is 566 * incremented. 567 * 568 * - At context switch OUT time, all process-virtual PMCs are stopped 569 * on hardware. The saved value is added to the PMCs value field 570 * only if the PMC is in a non-deleted state (the PMCs state could 571 * have changed during the current time slice). 572 * 573 * Note that since in-between a switch IN on a processor and a switch 574 * OUT, the PMC could have been released on another CPU. Therefore 575 * context switch OUT always looks at the hardware state to turn 576 * OFF PMCs and will update a PMC's saved value only if reachable 577 * from the target process record. 578 * 579 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could 580 * be attached to many processes at the time of the call and could 581 * be active on multiple CPUs). 582 * 583 * We prevent further scheduling of the PMC by marking it as in 584 * state 'DELETED'. If the runcount of the PMC is non-zero then 585 * this PMC is currently running on a CPU somewhere. The thread 586 * doing the PMCRELEASE operation waits by repeatedly doing a 587 * pause() till the runcount comes to zero. 588 * 589 * The contents of a PMC descriptor (struct pmc) are protected using 590 * a spin-mutex. In order to save space, we use a mutex pool. 591 * 592 * In terms of lock types used by witness(4), we use: 593 * - Type "pmc-sx", used by the global SX lock. 594 * - Type "pmc-sleep", for sleep mutexes used by logger threads. 595 * - Type "pmc-per-proc", for protecting PMC owner descriptors. 596 * - Type "pmc-leaf", used for all other spin mutexes. 597 */ 598 599 /* 600 * save the cpu binding of the current kthread 601 */ 602 603 static void 604 pmc_save_cpu_binding(struct pmc_binding *pb) 605 { 606 PMCDBG(CPU,BND,2, "%s", "save-cpu"); 607 thread_lock(curthread); 608 pb->pb_bound = sched_is_bound(curthread); 609 pb->pb_cpu = curthread->td_oncpu; 610 thread_unlock(curthread); 611 PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); 612 } 613 614 /* 615 * restore the cpu binding of the current thread 616 */ 617 618 static void 619 pmc_restore_cpu_binding(struct pmc_binding *pb) 620 { 621 PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", 622 curthread->td_oncpu, pb->pb_cpu); 623 thread_lock(curthread); 624 if (pb->pb_bound) 625 sched_bind(curthread, pb->pb_cpu); 626 else 627 sched_unbind(curthread); 628 thread_unlock(curthread); 629 PMCDBG(CPU,BND,2, "%s", "restore-cpu done"); 630 } 631 632 /* 633 * move execution over the specified cpu and bind it there. 634 */ 635 636 static void 637 pmc_select_cpu(int cpu) 638 { 639 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 640 ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); 641 642 /* Never move to an inactive CPU. */ 643 KASSERT(pmc_cpu_is_active(cpu), ("[pmc,%d] selecting inactive " 644 "CPU %d", __LINE__, cpu)); 645 646 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu); 647 thread_lock(curthread); 648 sched_bind(curthread, cpu); 649 thread_unlock(curthread); 650 651 KASSERT(curthread->td_oncpu == cpu, 652 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, 653 cpu, curthread->td_oncpu)); 654 655 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); 656 } 657 658 /* 659 * Force a context switch. 660 * 661 * We do this by pause'ing for 1 tick -- invoking mi_switch() is not 662 * guaranteed to force a context switch. 663 */ 664 665 static void 666 pmc_force_context_switch(void) 667 { 668 669 pause("pmcctx", 1); 670 } 671 672 /* 673 * Get the file name for an executable. This is a simple wrapper 674 * around vn_fullpath(9). 675 */ 676 677 static void 678 pmc_getfilename(struct vnode *v, char **fullpath, char **freepath) 679 { 680 681 *fullpath = "unknown"; 682 *freepath = NULL; 683 vn_lock(v, LK_CANRECURSE | LK_EXCLUSIVE | LK_RETRY); 684 vn_fullpath(curthread, v, fullpath, freepath); 685 VOP_UNLOCK(v, 0); 686 } 687 688 /* 689 * remove an process owning PMCs 690 */ 691 692 void 693 pmc_remove_owner(struct pmc_owner *po) 694 { 695 struct pmc *pm, *tmp; 696 697 sx_assert(&pmc_sx, SX_XLOCKED); 698 699 PMCDBG(OWN,ORM,1, "remove-owner po=%p", po); 700 701 /* Remove descriptor from the owner hash table */ 702 LIST_REMOVE(po, po_next); 703 704 /* release all owned PMC descriptors */ 705 LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) { 706 PMCDBG(OWN,ORM,2, "pmc=%p", pm); 707 KASSERT(pm->pm_owner == po, 708 ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po)); 709 710 pmc_release_pmc_descriptor(pm); /* will unlink from the list */ 711 } 712 713 KASSERT(po->po_sscount == 0, 714 ("[pmc,%d] SS count not zero", __LINE__)); 715 KASSERT(LIST_EMPTY(&po->po_pmcs), 716 ("[pmc,%d] PMC list not empty", __LINE__)); 717 718 /* de-configure the log file if present */ 719 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 720 pmclog_deconfigure_log(po); 721 } 722 723 /* 724 * remove an owner process record if all conditions are met. 725 */ 726 727 static void 728 pmc_maybe_remove_owner(struct pmc_owner *po) 729 { 730 731 PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po); 732 733 /* 734 * Remove owner record if 735 * - this process does not own any PMCs 736 * - this process has not allocated a system-wide sampling buffer 737 */ 738 739 if (LIST_EMPTY(&po->po_pmcs) && 740 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { 741 pmc_remove_owner(po); 742 pmc_destroy_owner_descriptor(po); 743 } 744 } 745 746 /* 747 * Add an association between a target process and a PMC. 748 */ 749 750 static void 751 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) 752 { 753 int ri; 754 struct pmc_target *pt; 755 756 sx_assert(&pmc_sx, SX_XLOCKED); 757 758 KASSERT(pm != NULL && pp != NULL, 759 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 760 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 761 ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d", 762 __LINE__, pm, pp->pp_proc->p_pid)); 763 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < ((int) md->pmd_npmc - 1), 764 ("[pmc,%d] Illegal reference count %d for process record %p", 765 __LINE__, pp->pp_refcnt, (void *) pp)); 766 767 ri = PMC_TO_ROWINDEX(pm); 768 769 PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", 770 pm, ri, pp); 771 772 #ifdef DEBUG 773 LIST_FOREACH(pt, &pm->pm_targets, pt_next) 774 if (pt->pt_process == pp) 775 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", 776 __LINE__, pp, pm)); 777 #endif 778 779 pt = malloc(sizeof(struct pmc_target), M_PMC, M_ZERO|M_WAITOK); 780 781 pt->pt_process = pp; 782 783 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); 784 785 atomic_store_rel_ptr((uintptr_t *)&pp->pp_pmcs[ri].pp_pmc, 786 (uintptr_t)pm); 787 788 if (pm->pm_owner->po_owner == pp->pp_proc) 789 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; 790 791 /* 792 * Initialize the per-process values at this row index. 793 */ 794 pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ? 795 pm->pm_sc.pm_reloadcount : 0; 796 797 pp->pp_refcnt++; 798 799 } 800 801 /* 802 * Removes the association between a target process and a PMC. 803 */ 804 805 static void 806 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) 807 { 808 int ri; 809 struct proc *p; 810 struct pmc_target *ptgt; 811 812 sx_assert(&pmc_sx, SX_XLOCKED); 813 814 KASSERT(pm != NULL && pp != NULL, 815 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 816 817 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt < (int) md->pmd_npmc, 818 ("[pmc,%d] Illegal ref count %d on process record %p", 819 __LINE__, pp->pp_refcnt, (void *) pp)); 820 821 ri = PMC_TO_ROWINDEX(pm); 822 823 PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", 824 pm, ri, pp); 825 826 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, 827 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, 828 ri, pm, pp->pp_pmcs[ri].pp_pmc)); 829 830 pp->pp_pmcs[ri].pp_pmc = NULL; 831 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; 832 833 /* Remove owner-specific flags */ 834 if (pm->pm_owner->po_owner == pp->pp_proc) { 835 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; 836 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; 837 } 838 839 pp->pp_refcnt--; 840 841 /* Remove the target process from the PMC structure */ 842 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) 843 if (ptgt->pt_process == pp) 844 break; 845 846 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " 847 "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); 848 849 LIST_REMOVE(ptgt, pt_next); 850 free(ptgt, M_PMC); 851 852 /* if the PMC now lacks targets, send the owner a SIGIO */ 853 if (LIST_EMPTY(&pm->pm_targets)) { 854 p = pm->pm_owner->po_owner; 855 PROC_LOCK(p); 856 psignal(p, SIGIO); 857 PROC_UNLOCK(p); 858 859 PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p, 860 SIGIO); 861 } 862 } 863 864 /* 865 * Check if PMC 'pm' may be attached to target process 't'. 866 */ 867 868 static int 869 pmc_can_attach(struct pmc *pm, struct proc *t) 870 { 871 struct proc *o; /* pmc owner */ 872 struct ucred *oc, *tc; /* owner, target credentials */ 873 int decline_attach, i; 874 875 /* 876 * A PMC's owner can always attach that PMC to itself. 877 */ 878 879 if ((o = pm->pm_owner->po_owner) == t) 880 return 0; 881 882 PROC_LOCK(o); 883 oc = o->p_ucred; 884 crhold(oc); 885 PROC_UNLOCK(o); 886 887 PROC_LOCK(t); 888 tc = t->p_ucred; 889 crhold(tc); 890 PROC_UNLOCK(t); 891 892 /* 893 * The effective uid of the PMC owner should match at least one 894 * of the {effective,real,saved} uids of the target process. 895 */ 896 897 decline_attach = oc->cr_uid != tc->cr_uid && 898 oc->cr_uid != tc->cr_svuid && 899 oc->cr_uid != tc->cr_ruid; 900 901 /* 902 * Every one of the target's group ids, must be in the owner's 903 * group list. 904 */ 905 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) 906 decline_attach = !groupmember(tc->cr_groups[i], oc); 907 908 /* check the read and saved gids too */ 909 if (decline_attach == 0) 910 decline_attach = !groupmember(tc->cr_rgid, oc) || 911 !groupmember(tc->cr_svgid, oc); 912 913 crfree(tc); 914 crfree(oc); 915 916 return !decline_attach; 917 } 918 919 /* 920 * Attach a process to a PMC. 921 */ 922 923 static int 924 pmc_attach_one_process(struct proc *p, struct pmc *pm) 925 { 926 int ri; 927 char *fullpath, *freepath; 928 struct pmc_process *pp; 929 930 sx_assert(&pmc_sx, SX_XLOCKED); 931 932 PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, 933 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 934 935 /* 936 * Locate the process descriptor corresponding to process 'p', 937 * allocating space as needed. 938 * 939 * Verify that rowindex 'pm_rowindex' is free in the process 940 * descriptor. 941 * 942 * If not, allocate space for a descriptor and link the 943 * process descriptor and PMC. 944 */ 945 ri = PMC_TO_ROWINDEX(pm); 946 947 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) 948 return ENOMEM; 949 950 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ 951 return EEXIST; 952 953 if (pp->pp_pmcs[ri].pp_pmc != NULL) 954 return EBUSY; 955 956 pmc_link_target_process(pm, pp); 957 958 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) && 959 (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0) 960 pm->pm_flags |= PMC_F_NEEDS_LOGFILE; 961 962 pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */ 963 964 /* issue an attach event to a configured log file */ 965 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) { 966 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 967 if (p->p_flag & P_KTHREAD) { 968 fullpath = kernelname; 969 freepath = NULL; 970 } else 971 pmclog_process_pmcattach(pm, p->p_pid, fullpath); 972 if (freepath) 973 free(freepath, M_TEMP); 974 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 975 pmc_log_process_mappings(pm->pm_owner, p); 976 } 977 /* mark process as using HWPMCs */ 978 PROC_LOCK(p); 979 p->p_flag |= P_HWPMC; 980 PROC_UNLOCK(p); 981 982 return 0; 983 } 984 985 /* 986 * Attach a process and optionally its children 987 */ 988 989 static int 990 pmc_attach_process(struct proc *p, struct pmc *pm) 991 { 992 int error; 993 struct proc *top; 994 995 sx_assert(&pmc_sx, SX_XLOCKED); 996 997 PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, 998 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 999 1000 1001 /* 1002 * If this PMC successfully allowed a GETMSR operation 1003 * in the past, disallow further ATTACHes. 1004 */ 1005 1006 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) 1007 return EPERM; 1008 1009 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1010 return pmc_attach_one_process(p, pm); 1011 1012 /* 1013 * Traverse all child processes, attaching them to 1014 * this PMC. 1015 */ 1016 1017 sx_slock(&proctree_lock); 1018 1019 top = p; 1020 1021 for (;;) { 1022 if ((error = pmc_attach_one_process(p, pm)) != 0) 1023 break; 1024 if (!LIST_EMPTY(&p->p_children)) 1025 p = LIST_FIRST(&p->p_children); 1026 else for (;;) { 1027 if (p == top) 1028 goto done; 1029 if (LIST_NEXT(p, p_sibling)) { 1030 p = LIST_NEXT(p, p_sibling); 1031 break; 1032 } 1033 p = p->p_pptr; 1034 } 1035 } 1036 1037 if (error) 1038 (void) pmc_detach_process(top, pm); 1039 1040 done: 1041 sx_sunlock(&proctree_lock); 1042 return error; 1043 } 1044 1045 /* 1046 * Detach a process from a PMC. If there are no other PMCs tracking 1047 * this process, remove the process structure from its hash table. If 1048 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. 1049 */ 1050 1051 static int 1052 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) 1053 { 1054 int ri; 1055 struct pmc_process *pp; 1056 1057 sx_assert(&pmc_sx, SX_XLOCKED); 1058 1059 KASSERT(pm != NULL, 1060 ("[pmc,%d] null pm pointer", __LINE__)); 1061 1062 ri = PMC_TO_ROWINDEX(pm); 1063 1064 PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", 1065 pm, ri, p, p->p_pid, p->p_comm, flags); 1066 1067 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) 1068 return ESRCH; 1069 1070 if (pp->pp_pmcs[ri].pp_pmc != pm) 1071 return EINVAL; 1072 1073 pmc_unlink_target_process(pm, pp); 1074 1075 /* Issue a detach entry if a log file is configured */ 1076 if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) 1077 pmclog_process_pmcdetach(pm, p->p_pid); 1078 1079 /* 1080 * If there are no PMCs targetting this process, we remove its 1081 * descriptor from the target hash table and unset the P_HWPMC 1082 * flag in the struct proc. 1083 */ 1084 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1085 ("[pmc,%d] Illegal refcnt %d for process struct %p", 1086 __LINE__, pp->pp_refcnt, pp)); 1087 1088 if (pp->pp_refcnt != 0) /* still a target of some PMC */ 1089 return 0; 1090 1091 pmc_remove_process_descriptor(pp); 1092 1093 if (flags & PMC_FLAG_REMOVE) 1094 free(pp, M_PMC); 1095 1096 PROC_LOCK(p); 1097 p->p_flag &= ~P_HWPMC; 1098 PROC_UNLOCK(p); 1099 1100 return 0; 1101 } 1102 1103 /* 1104 * Detach a process and optionally its descendants from a PMC. 1105 */ 1106 1107 static int 1108 pmc_detach_process(struct proc *p, struct pmc *pm) 1109 { 1110 struct proc *top; 1111 1112 sx_assert(&pmc_sx, SX_XLOCKED); 1113 1114 PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, 1115 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1116 1117 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1118 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1119 1120 /* 1121 * Traverse all children, detaching them from this PMC. We 1122 * ignore errors since we could be detaching a PMC from a 1123 * partially attached proc tree. 1124 */ 1125 1126 sx_slock(&proctree_lock); 1127 1128 top = p; 1129 1130 for (;;) { 1131 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1132 1133 if (!LIST_EMPTY(&p->p_children)) 1134 p = LIST_FIRST(&p->p_children); 1135 else for (;;) { 1136 if (p == top) 1137 goto done; 1138 if (LIST_NEXT(p, p_sibling)) { 1139 p = LIST_NEXT(p, p_sibling); 1140 break; 1141 } 1142 p = p->p_pptr; 1143 } 1144 } 1145 1146 done: 1147 sx_sunlock(&proctree_lock); 1148 1149 if (LIST_EMPTY(&pm->pm_targets)) 1150 pm->pm_flags &= ~PMC_F_ATTACH_DONE; 1151 1152 return 0; 1153 } 1154 1155 1156 /* 1157 * Thread context switch IN 1158 */ 1159 1160 static void 1161 pmc_process_csw_in(struct thread *td) 1162 { 1163 int cpu; 1164 unsigned int ri; 1165 struct pmc *pm; 1166 struct proc *p; 1167 struct pmc_cpu *pc; 1168 struct pmc_hw *phw; 1169 struct pmc_process *pp; 1170 pmc_value_t newvalue; 1171 1172 p = td->td_proc; 1173 1174 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) 1175 return; 1176 1177 KASSERT(pp->pp_proc == td->td_proc, 1178 ("[pmc,%d] not my thread state", __LINE__)); 1179 1180 critical_enter(); /* no preemption from this point */ 1181 1182 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1183 1184 PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1185 p->p_pid, p->p_comm, pp); 1186 1187 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1188 ("[pmc,%d] wierd CPU id %d", __LINE__, cpu)); 1189 1190 pc = pmc_pcpu[cpu]; 1191 1192 for (ri = 0; ri < md->pmd_npmc; ri++) { 1193 1194 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) 1195 continue; 1196 1197 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 1198 ("[pmc,%d] Target PMC in non-virtual mode (%d)", 1199 __LINE__, PMC_TO_MODE(pm))); 1200 1201 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1202 ("[pmc,%d] Row index mismatch pmc %d != ri %d", 1203 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1204 1205 /* 1206 * Only PMCs that are marked as 'RUNNING' need 1207 * be placed on hardware. 1208 */ 1209 1210 if (pm->pm_state != PMC_STATE_RUNNING) 1211 continue; 1212 1213 /* increment PMC runcount */ 1214 atomic_add_rel_32(&pm->pm_runcount, 1); 1215 1216 /* configure the HWPMC we are going to use. */ 1217 md->pmd_config_pmc(cpu, ri, pm); 1218 1219 phw = pc->pc_hwpmcs[ri]; 1220 1221 KASSERT(phw != NULL, 1222 ("[pmc,%d] null hw pointer", __LINE__)); 1223 1224 KASSERT(phw->phw_pmc == pm, 1225 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, 1226 phw->phw_pmc, pm)); 1227 1228 /* 1229 * Write out saved value and start the PMC. 1230 * 1231 * Sampling PMCs use a per-process value, while 1232 * counting mode PMCs use a per-pmc value that is 1233 * inherited across descendants. 1234 */ 1235 if (PMC_TO_MODE(pm) == PMC_MODE_TS) { 1236 mtx_pool_lock_spin(pmc_mtxpool, pm); 1237 newvalue = PMC_PCPU_SAVED(cpu,ri) = 1238 pp->pp_pmcs[ri].pp_pmcval; 1239 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1240 } else { 1241 KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC, 1242 ("[pmc,%d] illegal mode=%d", __LINE__, 1243 PMC_TO_MODE(pm))); 1244 mtx_pool_lock_spin(pmc_mtxpool, pm); 1245 newvalue = PMC_PCPU_SAVED(cpu, ri) = 1246 pm->pm_gv.pm_savedvalue; 1247 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1248 } 1249 1250 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue); 1251 1252 md->pmd_write_pmc(cpu, ri, newvalue); 1253 md->pmd_start_pmc(cpu, ri); 1254 } 1255 1256 /* 1257 * perform any other architecture/cpu dependent thread 1258 * switch-in actions. 1259 */ 1260 1261 (void) (*md->pmd_switch_in)(pc, pp); 1262 1263 critical_exit(); 1264 1265 } 1266 1267 /* 1268 * Thread context switch OUT. 1269 */ 1270 1271 static void 1272 pmc_process_csw_out(struct thread *td) 1273 { 1274 int cpu; 1275 enum pmc_mode mode; 1276 unsigned int ri; 1277 struct pmc *pm; 1278 struct proc *p; 1279 struct pmc_cpu *pc; 1280 struct pmc_process *pp; 1281 int64_t tmp; 1282 pmc_value_t newvalue; 1283 1284 /* 1285 * Locate our process descriptor; this may be NULL if 1286 * this process is exiting and we have already removed 1287 * the process from the target process table. 1288 * 1289 * Note that due to kernel preemption, multiple 1290 * context switches may happen while the process is 1291 * exiting. 1292 * 1293 * Note also that if the target process cannot be 1294 * found we still need to deconfigure any PMCs that 1295 * are currently running on hardware. 1296 */ 1297 1298 p = td->td_proc; 1299 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); 1300 1301 /* 1302 * save PMCs 1303 */ 1304 1305 critical_enter(); 1306 1307 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1308 1309 PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1310 p->p_pid, p->p_comm, pp); 1311 1312 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1313 ("[pmc,%d wierd CPU id %d", __LINE__, cpu)); 1314 1315 pc = pmc_pcpu[cpu]; 1316 1317 /* 1318 * When a PMC gets unlinked from a target PMC, it will 1319 * be removed from the target's pp_pmc[] array. 1320 * 1321 * However, on a MP system, the target could have been 1322 * executing on another CPU at the time of the unlink. 1323 * So, at context switch OUT time, we need to look at 1324 * the hardware to determine if a PMC is scheduled on 1325 * it. 1326 */ 1327 1328 for (ri = 0; ri < md->pmd_npmc; ri++) { 1329 1330 pm = NULL; 1331 (void) (*md->pmd_get_config)(cpu, ri, &pm); 1332 1333 if (pm == NULL) /* nothing at this row index */ 1334 continue; 1335 1336 mode = PMC_TO_MODE(pm); 1337 if (!PMC_IS_VIRTUAL_MODE(mode)) 1338 continue; /* not a process virtual PMC */ 1339 1340 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1341 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 1342 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1343 1344 /* Stop hardware if not already stopped */ 1345 if (pm->pm_stalled == 0) 1346 md->pmd_stop_pmc(cpu, ri); 1347 1348 /* reduce this PMC's runcount */ 1349 atomic_subtract_rel_32(&pm->pm_runcount, 1); 1350 1351 /* 1352 * If this PMC is associated with this process, 1353 * save the reading. 1354 */ 1355 1356 if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { 1357 1358 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 1359 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, 1360 pm, ri, pp->pp_pmcs[ri].pp_pmc)); 1361 1362 KASSERT(pp->pp_refcnt > 0, 1363 ("[pmc,%d] pp refcnt = %d", __LINE__, 1364 pp->pp_refcnt)); 1365 1366 md->pmd_read_pmc(cpu, ri, &newvalue); 1367 1368 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); 1369 1370 PMCDBG(CSW,SWI,1,"cpu=%d ri=%d tmp=%jd", cpu, ri, 1371 tmp); 1372 1373 if (mode == PMC_MODE_TS) { 1374 1375 /* 1376 * For sampling process-virtual PMCs, 1377 * we expect the count to be 1378 * decreasing as the 'value' 1379 * programmed into the PMC is the 1380 * number of events to be seen till 1381 * the next sampling interrupt. 1382 */ 1383 if (tmp < 0) 1384 tmp += pm->pm_sc.pm_reloadcount; 1385 mtx_pool_lock_spin(pmc_mtxpool, pm); 1386 pp->pp_pmcs[ri].pp_pmcval -= tmp; 1387 if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0) 1388 pp->pp_pmcs[ri].pp_pmcval += 1389 pm->pm_sc.pm_reloadcount; 1390 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1391 1392 } else { 1393 1394 /* 1395 * For counting process-virtual PMCs, 1396 * we expect the count to be 1397 * increasing monotonically, modulo a 64 1398 * bit wraparound. 1399 */ 1400 KASSERT((int64_t) tmp >= 0, 1401 ("[pmc,%d] negative increment cpu=%d " 1402 "ri=%d newvalue=%jx saved=%jx " 1403 "incr=%jx", __LINE__, cpu, ri, 1404 newvalue, PMC_PCPU_SAVED(cpu,ri), tmp)); 1405 1406 mtx_pool_lock_spin(pmc_mtxpool, pm); 1407 pm->pm_gv.pm_savedvalue += tmp; 1408 pp->pp_pmcs[ri].pp_pmcval += tmp; 1409 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1410 1411 if (pm->pm_flags & PMC_F_LOG_PROCCSW) 1412 pmclog_process_proccsw(pm, pp, tmp); 1413 } 1414 } 1415 1416 /* mark hardware as free */ 1417 md->pmd_config_pmc(cpu, ri, NULL); 1418 } 1419 1420 /* 1421 * perform any other architecture/cpu dependent thread 1422 * switch out functions. 1423 */ 1424 1425 (void) (*md->pmd_switch_out)(pc, pp); 1426 1427 critical_exit(); 1428 } 1429 1430 /* 1431 * Log a KLD operation. 1432 */ 1433 1434 static void 1435 pmc_process_kld_load(struct pmckern_map_in *pkm) 1436 { 1437 struct pmc_owner *po; 1438 1439 sx_assert(&pmc_sx, SX_LOCKED); 1440 1441 /* 1442 * Notify owners of system sampling PMCs about KLD operations. 1443 */ 1444 1445 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1446 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1447 pmclog_process_map_in(po, (pid_t) -1, pkm->pm_address, 1448 (char *) pkm->pm_file); 1449 1450 /* 1451 * TODO: Notify owners of (all) process-sampling PMCs too. 1452 */ 1453 1454 return; 1455 } 1456 1457 static void 1458 pmc_process_kld_unload(struct pmckern_map_out *pkm) 1459 { 1460 struct pmc_owner *po; 1461 1462 sx_assert(&pmc_sx, SX_LOCKED); 1463 1464 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1465 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1466 pmclog_process_map_out(po, (pid_t) -1, 1467 pkm->pm_address, pkm->pm_address + pkm->pm_size); 1468 1469 /* 1470 * TODO: Notify owners of process-sampling PMCs. 1471 */ 1472 } 1473 1474 /* 1475 * A mapping change for a process. 1476 */ 1477 1478 static void 1479 pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm) 1480 { 1481 int ri; 1482 pid_t pid; 1483 char *fullpath, *freepath; 1484 const struct pmc *pm; 1485 struct pmc_owner *po; 1486 const struct pmc_process *pp; 1487 1488 freepath = fullpath = NULL; 1489 pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath); 1490 1491 pid = td->td_proc->p_pid; 1492 1493 /* Inform owners of all system-wide sampling PMCs. */ 1494 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1495 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1496 pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); 1497 1498 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1499 goto done; 1500 1501 /* 1502 * Inform sampling PMC owners tracking this process. 1503 */ 1504 for (ri = 0; ri < md->pmd_npmc; ri++) 1505 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1506 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1507 pmclog_process_map_in(pm->pm_owner, 1508 pid, pkm->pm_address, fullpath); 1509 1510 done: 1511 if (freepath) 1512 free(freepath, M_TEMP); 1513 } 1514 1515 1516 /* 1517 * Log an munmap request. 1518 */ 1519 1520 static void 1521 pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) 1522 { 1523 int ri; 1524 pid_t pid; 1525 struct pmc_owner *po; 1526 const struct pmc *pm; 1527 const struct pmc_process *pp; 1528 1529 pid = td->td_proc->p_pid; 1530 1531 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1532 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1533 pmclog_process_map_out(po, pid, pkm->pm_address, 1534 pkm->pm_address + pkm->pm_size); 1535 1536 if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) 1537 return; 1538 1539 for (ri = 0; ri < md->pmd_npmc; ri++) 1540 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL && 1541 PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1542 pmclog_process_map_out(pm->pm_owner, pid, 1543 pkm->pm_address, pkm->pm_address + pkm->pm_size); 1544 } 1545 1546 /* 1547 * Log mapping information about the kernel. 1548 */ 1549 1550 static void 1551 pmc_log_kernel_mappings(struct pmc *pm) 1552 { 1553 struct pmc_owner *po; 1554 struct pmckern_map_in *km, *kmbase; 1555 1556 sx_assert(&pmc_sx, SX_LOCKED); 1557 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 1558 ("[pmc,%d] non-sampling PMC (%p) desires mapping information", 1559 __LINE__, (void *) pm)); 1560 1561 po = pm->pm_owner; 1562 1563 if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) 1564 return; 1565 1566 /* 1567 * Log the current set of kernel modules. 1568 */ 1569 kmbase = linker_hwpmc_list_objects(); 1570 for (km = kmbase; km->pm_file != NULL; km++) { 1571 PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file, 1572 (void *) km->pm_address); 1573 pmclog_process_map_in(po, (pid_t) -1, km->pm_address, 1574 km->pm_file); 1575 } 1576 free(kmbase, M_LINKER); 1577 1578 po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; 1579 } 1580 1581 /* 1582 * Log the mappings for a single process. 1583 */ 1584 1585 static void 1586 pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) 1587 { 1588 } 1589 1590 /* 1591 * Log mappings for all processes in the system. 1592 */ 1593 1594 static void 1595 pmc_log_all_process_mappings(struct pmc_owner *po) 1596 { 1597 struct proc *p, *top; 1598 1599 sx_assert(&pmc_sx, SX_XLOCKED); 1600 1601 if ((p = pfind(1)) == NULL) 1602 panic("[pmc,%d] Cannot find init", __LINE__); 1603 1604 PROC_UNLOCK(p); 1605 1606 sx_slock(&proctree_lock); 1607 1608 top = p; 1609 1610 for (;;) { 1611 pmc_log_process_mappings(po, p); 1612 if (!LIST_EMPTY(&p->p_children)) 1613 p = LIST_FIRST(&p->p_children); 1614 else for (;;) { 1615 if (p == top) 1616 goto done; 1617 if (LIST_NEXT(p, p_sibling)) { 1618 p = LIST_NEXT(p, p_sibling); 1619 break; 1620 } 1621 p = p->p_pptr; 1622 } 1623 } 1624 done: 1625 sx_sunlock(&proctree_lock); 1626 } 1627 1628 /* 1629 * The 'hook' invoked from the kernel proper 1630 */ 1631 1632 1633 #ifdef DEBUG 1634 const char *pmc_hooknames[] = { 1635 /* these strings correspond to PMC_FN_* in <sys/pmckern.h> */ 1636 "", 1637 "EXEC", 1638 "CSW-IN", 1639 "CSW-OUT", 1640 "SAMPLE", 1641 "KLDLOAD", 1642 "KLDUNLOAD", 1643 "MMAP", 1644 "MUNMAP", 1645 "CALLCHAIN" 1646 }; 1647 #endif 1648 1649 static int 1650 pmc_hook_handler(struct thread *td, int function, void *arg) 1651 { 1652 1653 PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, 1654 pmc_hooknames[function], arg); 1655 1656 switch (function) 1657 { 1658 1659 /* 1660 * Process exec() 1661 */ 1662 1663 case PMC_FN_PROCESS_EXEC: 1664 { 1665 char *fullpath, *freepath; 1666 unsigned int ri; 1667 int is_using_hwpmcs; 1668 struct pmc *pm; 1669 struct proc *p; 1670 struct pmc_owner *po; 1671 struct pmc_process *pp; 1672 struct pmckern_procexec *pk; 1673 1674 sx_assert(&pmc_sx, SX_XLOCKED); 1675 1676 p = td->td_proc; 1677 pmc_getfilename(p->p_textvp, &fullpath, &freepath); 1678 1679 pk = (struct pmckern_procexec *) arg; 1680 1681 /* Inform owners of SS mode PMCs of the exec event. */ 1682 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 1683 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 1684 pmclog_process_procexec(po, PMC_ID_INVALID, 1685 p->p_pid, pk->pm_entryaddr, fullpath); 1686 1687 PROC_LOCK(p); 1688 is_using_hwpmcs = p->p_flag & P_HWPMC; 1689 PROC_UNLOCK(p); 1690 1691 if (!is_using_hwpmcs) { 1692 if (freepath) 1693 free(freepath, M_TEMP); 1694 break; 1695 } 1696 1697 /* 1698 * PMCs are not inherited across an exec(): remove any 1699 * PMCs that this process is the owner of. 1700 */ 1701 1702 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 1703 pmc_remove_owner(po); 1704 pmc_destroy_owner_descriptor(po); 1705 } 1706 1707 /* 1708 * If the process being exec'ed is not the target of any 1709 * PMC, we are done. 1710 */ 1711 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) { 1712 if (freepath) 1713 free(freepath, M_TEMP); 1714 break; 1715 } 1716 1717 /* 1718 * Log the exec event to all monitoring owners. Skip 1719 * owners who have already recieved the event because 1720 * they had system sampling PMCs active. 1721 */ 1722 for (ri = 0; ri < md->pmd_npmc; ri++) 1723 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 1724 po = pm->pm_owner; 1725 if (po->po_sscount == 0 && 1726 po->po_flags & PMC_PO_OWNS_LOGFILE) 1727 pmclog_process_procexec(po, pm->pm_id, 1728 p->p_pid, pk->pm_entryaddr, 1729 fullpath); 1730 } 1731 1732 if (freepath) 1733 free(freepath, M_TEMP); 1734 1735 1736 PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", 1737 p, p->p_pid, p->p_comm, pk->pm_credentialschanged); 1738 1739 if (pk->pm_credentialschanged == 0) /* no change */ 1740 break; 1741 1742 /* 1743 * If the newly exec()'ed process has a different credential 1744 * than before, allow it to be the target of a PMC only if 1745 * the PMC's owner has sufficient priviledge. 1746 */ 1747 1748 for (ri = 0; ri < md->pmd_npmc; ri++) 1749 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) 1750 if (pmc_can_attach(pm, td->td_proc) != 0) 1751 pmc_detach_one_process(td->td_proc, 1752 pm, PMC_FLAG_NONE); 1753 1754 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1755 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, 1756 pp->pp_refcnt, pp)); 1757 1758 /* 1759 * If this process is no longer the target of any 1760 * PMCs, we can remove the process entry and free 1761 * up space. 1762 */ 1763 1764 if (pp->pp_refcnt == 0) { 1765 pmc_remove_process_descriptor(pp); 1766 free(pp, M_PMC); 1767 break; 1768 } 1769 1770 } 1771 break; 1772 1773 case PMC_FN_CSW_IN: 1774 pmc_process_csw_in(td); 1775 break; 1776 1777 case PMC_FN_CSW_OUT: 1778 pmc_process_csw_out(td); 1779 break; 1780 1781 /* 1782 * Process accumulated PC samples. 1783 * 1784 * This function is expected to be called by hardclock() for 1785 * each CPU that has accumulated PC samples. 1786 * 1787 * This function is to be executed on the CPU whose samples 1788 * are being processed. 1789 */ 1790 case PMC_FN_DO_SAMPLES: 1791 1792 /* 1793 * Clear the cpu specific bit in the CPU mask before 1794 * do the rest of the processing. If the NMI handler 1795 * gets invoked after the "atomic_clear_int()" call 1796 * below but before "pmc_process_samples()" gets 1797 * around to processing the interrupt, then we will 1798 * come back here at the next hardclock() tick (and 1799 * may find nothing to do if "pmc_process_samples()" 1800 * had already processed the interrupt). We don't 1801 * lose the interrupt sample. 1802 */ 1803 atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid))); 1804 pmc_process_samples(PCPU_GET(cpuid)); 1805 break; 1806 1807 1808 case PMC_FN_KLD_LOAD: 1809 sx_assert(&pmc_sx, SX_LOCKED); 1810 pmc_process_kld_load((struct pmckern_map_in *) arg); 1811 break; 1812 1813 case PMC_FN_KLD_UNLOAD: 1814 sx_assert(&pmc_sx, SX_LOCKED); 1815 pmc_process_kld_unload((struct pmckern_map_out *) arg); 1816 break; 1817 1818 case PMC_FN_MMAP: 1819 sx_assert(&pmc_sx, SX_LOCKED); 1820 pmc_process_mmap(td, (struct pmckern_map_in *) arg); 1821 break; 1822 1823 case PMC_FN_MUNMAP: 1824 sx_assert(&pmc_sx, SX_LOCKED); 1825 pmc_process_munmap(td, (struct pmckern_map_out *) arg); 1826 break; 1827 1828 case PMC_FN_USER_CALLCHAIN: 1829 /* 1830 * Record a call chain. 1831 */ 1832 pmc_capture_user_callchain(PCPU_GET(cpuid), 1833 (struct trapframe *) arg); 1834 break; 1835 1836 default: 1837 #ifdef DEBUG 1838 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); 1839 #endif 1840 break; 1841 1842 } 1843 1844 return 0; 1845 } 1846 1847 /* 1848 * allocate a 'struct pmc_owner' descriptor in the owner hash table. 1849 */ 1850 1851 static struct pmc_owner * 1852 pmc_allocate_owner_descriptor(struct proc *p) 1853 { 1854 uint32_t hindex; 1855 struct pmc_owner *po; 1856 struct pmc_ownerhash *poh; 1857 1858 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1859 poh = &pmc_ownerhash[hindex]; 1860 1861 /* allocate space for N pointers and one descriptor struct */ 1862 po = malloc(sizeof(struct pmc_owner), M_PMC, M_ZERO|M_WAITOK); 1863 1864 po->po_sscount = po->po_error = po->po_flags = 0; 1865 po->po_file = NULL; 1866 po->po_owner = p; 1867 po->po_kthread = NULL; 1868 LIST_INIT(&po->po_pmcs); 1869 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ 1870 1871 TAILQ_INIT(&po->po_logbuffers); 1872 mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc-per-proc", MTX_SPIN); 1873 1874 PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", 1875 p, p->p_pid, p->p_comm, po); 1876 1877 return po; 1878 } 1879 1880 static void 1881 pmc_destroy_owner_descriptor(struct pmc_owner *po) 1882 { 1883 1884 PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)", 1885 po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm); 1886 1887 mtx_destroy(&po->po_mtx); 1888 free(po, M_PMC); 1889 } 1890 1891 /* 1892 * find the descriptor corresponding to process 'p', adding or removing it 1893 * as specified by 'mode'. 1894 */ 1895 1896 static struct pmc_process * 1897 pmc_find_process_descriptor(struct proc *p, uint32_t mode) 1898 { 1899 uint32_t hindex; 1900 struct pmc_process *pp, *ppnew; 1901 struct pmc_processhash *pph; 1902 1903 hindex = PMC_HASH_PTR(p, pmc_processhashmask); 1904 pph = &pmc_processhash[hindex]; 1905 1906 ppnew = NULL; 1907 1908 /* 1909 * Pre-allocate memory in the FIND_ALLOCATE case since we 1910 * cannot call malloc(9) once we hold a spin lock. 1911 */ 1912 1913 if (mode & PMC_FLAG_ALLOCATE) { 1914 /* allocate additional space for 'n' pmc pointers */ 1915 ppnew = malloc(sizeof(struct pmc_process) + md->pmd_npmc * 1916 sizeof(struct pmc_targetstate), M_PMC, M_ZERO|M_WAITOK); 1917 } 1918 1919 mtx_lock_spin(&pmc_processhash_mtx); 1920 LIST_FOREACH(pp, pph, pp_next) 1921 if (pp->pp_proc == p) 1922 break; 1923 1924 if ((mode & PMC_FLAG_REMOVE) && pp != NULL) 1925 LIST_REMOVE(pp, pp_next); 1926 1927 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && 1928 ppnew != NULL) { 1929 ppnew->pp_proc = p; 1930 LIST_INSERT_HEAD(pph, ppnew, pp_next); 1931 pp = ppnew; 1932 ppnew = NULL; 1933 } 1934 mtx_unlock_spin(&pmc_processhash_mtx); 1935 1936 if (pp != NULL && ppnew != NULL) 1937 free(ppnew, M_PMC); 1938 1939 return pp; 1940 } 1941 1942 /* 1943 * remove a process descriptor from the process hash table. 1944 */ 1945 1946 static void 1947 pmc_remove_process_descriptor(struct pmc_process *pp) 1948 { 1949 KASSERT(pp->pp_refcnt == 0, 1950 ("[pmc,%d] Removing process descriptor %p with count %d", 1951 __LINE__, pp, pp->pp_refcnt)); 1952 1953 mtx_lock_spin(&pmc_processhash_mtx); 1954 LIST_REMOVE(pp, pp_next); 1955 mtx_unlock_spin(&pmc_processhash_mtx); 1956 } 1957 1958 1959 /* 1960 * find an owner descriptor corresponding to proc 'p' 1961 */ 1962 1963 static struct pmc_owner * 1964 pmc_find_owner_descriptor(struct proc *p) 1965 { 1966 uint32_t hindex; 1967 struct pmc_owner *po; 1968 struct pmc_ownerhash *poh; 1969 1970 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1971 poh = &pmc_ownerhash[hindex]; 1972 1973 po = NULL; 1974 LIST_FOREACH(po, poh, po_next) 1975 if (po->po_owner == p) 1976 break; 1977 1978 PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " 1979 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); 1980 1981 return po; 1982 } 1983 1984 /* 1985 * pmc_allocate_pmc_descriptor 1986 * 1987 * Allocate a pmc descriptor and initialize its 1988 * fields. 1989 */ 1990 1991 static struct pmc * 1992 pmc_allocate_pmc_descriptor(void) 1993 { 1994 struct pmc *pmc; 1995 1996 pmc = malloc(sizeof(struct pmc), M_PMC, M_ZERO|M_WAITOK); 1997 1998 if (pmc != NULL) { 1999 pmc->pm_owner = NULL; 2000 LIST_INIT(&pmc->pm_targets); 2001 } 2002 2003 PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); 2004 2005 return pmc; 2006 } 2007 2008 /* 2009 * Destroy a pmc descriptor. 2010 */ 2011 2012 static void 2013 pmc_destroy_pmc_descriptor(struct pmc *pm) 2014 { 2015 (void) pm; 2016 2017 #ifdef DEBUG 2018 KASSERT(pm->pm_state == PMC_STATE_DELETED || 2019 pm->pm_state == PMC_STATE_FREE, 2020 ("[pmc,%d] destroying non-deleted PMC", __LINE__)); 2021 KASSERT(LIST_EMPTY(&pm->pm_targets), 2022 ("[pmc,%d] destroying pmc with targets", __LINE__)); 2023 KASSERT(pm->pm_owner == NULL, 2024 ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); 2025 KASSERT(pm->pm_runcount == 0, 2026 ("[pmc,%d] pmc has non-zero run count %d", __LINE__, 2027 pm->pm_runcount)); 2028 #endif 2029 } 2030 2031 static void 2032 pmc_wait_for_pmc_idle(struct pmc *pm) 2033 { 2034 #ifdef DEBUG 2035 volatile int maxloop; 2036 2037 maxloop = 100 * pmc_cpu_max(); 2038 #endif 2039 2040 /* 2041 * Loop (with a forced context switch) till the PMC's runcount 2042 * comes down to zero. 2043 */ 2044 while (atomic_load_acq_32(&pm->pm_runcount) > 0) { 2045 #ifdef DEBUG 2046 maxloop--; 2047 KASSERT(maxloop > 0, 2048 ("[pmc,%d] (ri%d, rc%d) waiting too long for " 2049 "pmc to be free", __LINE__, 2050 PMC_TO_ROWINDEX(pm), pm->pm_runcount)); 2051 #endif 2052 pmc_force_context_switch(); 2053 } 2054 } 2055 2056 /* 2057 * This function does the following things: 2058 * 2059 * - detaches the PMC from hardware 2060 * - unlinks all target threads that were attached to it 2061 * - removes the PMC from its owner's list 2062 * - destroy's the PMC private mutex 2063 * 2064 * Once this function completes, the given pmc pointer can be safely 2065 * FREE'd by the caller. 2066 */ 2067 2068 static void 2069 pmc_release_pmc_descriptor(struct pmc *pm) 2070 { 2071 u_int ri, cpu; 2072 enum pmc_mode mode; 2073 struct pmc_hw *phw; 2074 struct pmc_owner *po; 2075 struct pmc_process *pp; 2076 struct pmc_target *ptgt, *tmp; 2077 struct pmc_binding pb; 2078 2079 sx_assert(&pmc_sx, SX_XLOCKED); 2080 2081 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); 2082 2083 ri = PMC_TO_ROWINDEX(pm); 2084 mode = PMC_TO_MODE(pm); 2085 2086 PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, 2087 mode); 2088 2089 /* 2090 * First, we take the PMC off hardware. 2091 */ 2092 cpu = 0; 2093 if (PMC_IS_SYSTEM_MODE(mode)) { 2094 2095 /* 2096 * A system mode PMC runs on a specific CPU. Switch 2097 * to this CPU and turn hardware off. 2098 */ 2099 pmc_save_cpu_binding(&pb); 2100 2101 cpu = PMC_TO_CPU(pm); 2102 2103 pmc_select_cpu(cpu); 2104 2105 /* switch off non-stalled CPUs */ 2106 if (pm->pm_state == PMC_STATE_RUNNING && 2107 pm->pm_stalled == 0) { 2108 2109 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 2110 2111 KASSERT(phw->phw_pmc == pm, 2112 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", 2113 __LINE__, ri, phw->phw_pmc, pm)); 2114 PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); 2115 2116 critical_enter(); 2117 md->pmd_stop_pmc(cpu, ri); 2118 critical_exit(); 2119 } 2120 2121 PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); 2122 2123 critical_enter(); 2124 md->pmd_config_pmc(cpu, ri, NULL); 2125 critical_exit(); 2126 2127 /* adjust the global and process count of SS mode PMCs */ 2128 if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) { 2129 po = pm->pm_owner; 2130 po->po_sscount--; 2131 if (po->po_sscount == 0) { 2132 atomic_subtract_rel_int(&pmc_ss_count, 1); 2133 LIST_REMOVE(po, po_ssnext); 2134 } 2135 } 2136 2137 pm->pm_state = PMC_STATE_DELETED; 2138 2139 pmc_restore_cpu_binding(&pb); 2140 2141 /* 2142 * We could have references to this PMC structure in 2143 * the per-cpu sample queues. Wait for the queue to 2144 * drain. 2145 */ 2146 pmc_wait_for_pmc_idle(pm); 2147 2148 } else if (PMC_IS_VIRTUAL_MODE(mode)) { 2149 2150 /* 2151 * A virtual PMC could be running on multiple CPUs at 2152 * a given instant. 2153 * 2154 * By marking its state as DELETED, we ensure that 2155 * this PMC is never further scheduled on hardware. 2156 * 2157 * Then we wait till all CPUs are done with this PMC. 2158 */ 2159 pm->pm_state = PMC_STATE_DELETED; 2160 2161 2162 /* Wait for the PMCs runcount to come to zero. */ 2163 pmc_wait_for_pmc_idle(pm); 2164 2165 /* 2166 * At this point the PMC is off all CPUs and cannot be 2167 * freshly scheduled onto a CPU. It is now safe to 2168 * unlink all targets from this PMC. If a 2169 * process-record's refcount falls to zero, we remove 2170 * it from the hash table. The module-wide SX lock 2171 * protects us from races. 2172 */ 2173 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { 2174 pp = ptgt->pt_process; 2175 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ 2176 2177 PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); 2178 2179 /* 2180 * If the target process record shows that no 2181 * PMCs are attached to it, reclaim its space. 2182 */ 2183 2184 if (pp->pp_refcnt == 0) { 2185 pmc_remove_process_descriptor(pp); 2186 free(pp, M_PMC); 2187 } 2188 } 2189 2190 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ 2191 2192 } 2193 2194 /* 2195 * Release any MD resources 2196 */ 2197 2198 (void) md->pmd_release_pmc(cpu, ri, pm); 2199 2200 /* 2201 * Update row disposition 2202 */ 2203 2204 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) 2205 PMC_UNMARK_ROW_STANDALONE(ri); 2206 else 2207 PMC_UNMARK_ROW_THREAD(ri); 2208 2209 /* unlink from the owner's list */ 2210 if (pm->pm_owner) { 2211 LIST_REMOVE(pm, pm_next); 2212 pm->pm_owner = NULL; 2213 } 2214 2215 pmc_destroy_pmc_descriptor(pm); 2216 } 2217 2218 /* 2219 * Register an owner and a pmc. 2220 */ 2221 2222 static int 2223 pmc_register_owner(struct proc *p, struct pmc *pmc) 2224 { 2225 struct pmc_owner *po; 2226 2227 sx_assert(&pmc_sx, SX_XLOCKED); 2228 2229 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2230 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) 2231 return ENOMEM; 2232 2233 KASSERT(pmc->pm_owner == NULL, 2234 ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); 2235 pmc->pm_owner = po; 2236 2237 LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next); 2238 2239 PROC_LOCK(p); 2240 p->p_flag |= P_HWPMC; 2241 PROC_UNLOCK(p); 2242 2243 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 2244 pmclog_process_pmcallocate(pmc); 2245 2246 PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p", 2247 po, pmc); 2248 2249 return 0; 2250 } 2251 2252 /* 2253 * Return the current row disposition: 2254 * == 0 => FREE 2255 * > 0 => PROCESS MODE 2256 * < 0 => SYSTEM MODE 2257 */ 2258 2259 int 2260 pmc_getrowdisp(int ri) 2261 { 2262 return pmc_pmcdisp[ri]; 2263 } 2264 2265 /* 2266 * Check if a PMC at row index 'ri' can be allocated to the current 2267 * process. 2268 * 2269 * Allocation can fail if: 2270 * - the current process is already being profiled by a PMC at index 'ri', 2271 * attached to it via OP_PMCATTACH. 2272 * - the current process has already allocated a PMC at index 'ri' 2273 * via OP_ALLOCATE. 2274 */ 2275 2276 static int 2277 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) 2278 { 2279 enum pmc_mode mode; 2280 struct pmc *pm; 2281 struct pmc_owner *po; 2282 struct pmc_process *pp; 2283 2284 PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " 2285 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); 2286 2287 /* 2288 * We shouldn't have already allocated a process-mode PMC at 2289 * row index 'ri'. 2290 * 2291 * We shouldn't have allocated a system-wide PMC on the same 2292 * CPU and same RI. 2293 */ 2294 if ((po = pmc_find_owner_descriptor(p)) != NULL) 2295 LIST_FOREACH(pm, &po->po_pmcs, pm_next) { 2296 if (PMC_TO_ROWINDEX(pm) == ri) { 2297 mode = PMC_TO_MODE(pm); 2298 if (PMC_IS_VIRTUAL_MODE(mode)) 2299 return EEXIST; 2300 if (PMC_IS_SYSTEM_MODE(mode) && 2301 (int) PMC_TO_CPU(pm) == cpu) 2302 return EEXIST; 2303 } 2304 } 2305 2306 /* 2307 * We also shouldn't be the target of any PMC at this index 2308 * since otherwise a PMC_ATTACH to ourselves will fail. 2309 */ 2310 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) 2311 if (pp->pp_pmcs[ri].pp_pmc) 2312 return EEXIST; 2313 2314 PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", 2315 p, p->p_pid, p->p_comm, ri); 2316 2317 return 0; 2318 } 2319 2320 /* 2321 * Check if a given PMC at row index 'ri' can be currently used in 2322 * mode 'mode'. 2323 */ 2324 2325 static int 2326 pmc_can_allocate_row(int ri, enum pmc_mode mode) 2327 { 2328 enum pmc_disp disp; 2329 2330 sx_assert(&pmc_sx, SX_XLOCKED); 2331 2332 PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); 2333 2334 if (PMC_IS_SYSTEM_MODE(mode)) 2335 disp = PMC_DISP_STANDALONE; 2336 else 2337 disp = PMC_DISP_THREAD; 2338 2339 /* 2340 * check disposition for PMC row 'ri': 2341 * 2342 * Expected disposition Row-disposition Result 2343 * 2344 * STANDALONE STANDALONE or FREE proceed 2345 * STANDALONE THREAD fail 2346 * THREAD THREAD or FREE proceed 2347 * THREAD STANDALONE fail 2348 */ 2349 2350 if (!PMC_ROW_DISP_IS_FREE(ri) && 2351 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && 2352 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) 2353 return EBUSY; 2354 2355 /* 2356 * All OK 2357 */ 2358 2359 PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); 2360 2361 return 0; 2362 2363 } 2364 2365 /* 2366 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. 2367 */ 2368 2369 static struct pmc * 2370 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) 2371 { 2372 struct pmc *pm; 2373 2374 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, 2375 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, 2376 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); 2377 2378 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2379 if (pm->pm_id == pmcid) 2380 return pm; 2381 2382 return NULL; 2383 } 2384 2385 static int 2386 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) 2387 { 2388 2389 struct pmc *pm; 2390 struct pmc_owner *po; 2391 2392 PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid); 2393 2394 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) 2395 return ESRCH; 2396 2397 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) 2398 return EINVAL; 2399 2400 PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); 2401 2402 *pmc = pm; 2403 return 0; 2404 } 2405 2406 /* 2407 * Start a PMC. 2408 */ 2409 2410 static int 2411 pmc_start(struct pmc *pm) 2412 { 2413 int error, cpu, ri; 2414 enum pmc_mode mode; 2415 struct pmc_owner *po; 2416 struct pmc_binding pb; 2417 2418 KASSERT(pm != NULL, 2419 ("[pmc,%d] null pm", __LINE__)); 2420 2421 mode = PMC_TO_MODE(pm); 2422 ri = PMC_TO_ROWINDEX(pm); 2423 error = 0; 2424 2425 PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); 2426 2427 po = pm->pm_owner; 2428 2429 /* 2430 * Disallow PMCSTART if a logfile is required but has not been 2431 * configured yet. 2432 */ 2433 if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && 2434 (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) 2435 return EDOOFUS; /* programming error */ 2436 2437 /* 2438 * If this is a sampling mode PMC, log mapping information for 2439 * the kernel modules that are currently loaded. 2440 */ 2441 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2442 pmc_log_kernel_mappings(pm); 2443 2444 if (PMC_IS_VIRTUAL_MODE(mode)) { 2445 2446 /* 2447 * If a PMCATTACH has never been done on this PMC, 2448 * attach it to its owner process. 2449 */ 2450 2451 if (LIST_EMPTY(&pm->pm_targets)) 2452 error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH : 2453 pmc_attach_process(po->po_owner, pm); 2454 2455 /* 2456 * If the PMC is attached to its owner, then force a context 2457 * switch to ensure that the MD state gets set correctly. 2458 */ 2459 2460 if (error == 0) { 2461 pm->pm_state = PMC_STATE_RUNNING; 2462 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) 2463 pmc_force_context_switch(); 2464 } 2465 2466 return error; 2467 } 2468 2469 2470 /* 2471 * A system-wide PMC. 2472 * 2473 * Add the owner to the global list if this is a system-wide 2474 * sampling PMC. 2475 */ 2476 2477 if (mode == PMC_MODE_SS) { 2478 if (po->po_sscount == 0) { 2479 LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext); 2480 atomic_add_rel_int(&pmc_ss_count, 1); 2481 PMCDBG(PMC,OPS,1, "po=%p in global list", po); 2482 } 2483 po->po_sscount++; 2484 } 2485 2486 /* Log mapping information for all processes in the system. */ 2487 pmc_log_all_process_mappings(po); 2488 2489 /* 2490 * Move to the CPU associated with this 2491 * PMC, and start the hardware. 2492 */ 2493 2494 pmc_save_cpu_binding(&pb); 2495 2496 cpu = PMC_TO_CPU(pm); 2497 2498 if (!pmc_cpu_is_active(cpu)) 2499 return ENXIO; 2500 2501 pmc_select_cpu(cpu); 2502 2503 /* 2504 * global PMCs are configured at allocation time 2505 * so write out the initial value and start the PMC. 2506 */ 2507 2508 pm->pm_state = PMC_STATE_RUNNING; 2509 2510 critical_enter(); 2511 if ((error = md->pmd_write_pmc(cpu, ri, 2512 PMC_IS_SAMPLING_MODE(mode) ? 2513 pm->pm_sc.pm_reloadcount : 2514 pm->pm_sc.pm_initial)) == 0) 2515 error = md->pmd_start_pmc(cpu, ri); 2516 critical_exit(); 2517 2518 pmc_restore_cpu_binding(&pb); 2519 2520 return error; 2521 } 2522 2523 /* 2524 * Stop a PMC. 2525 */ 2526 2527 static int 2528 pmc_stop(struct pmc *pm) 2529 { 2530 int cpu, error, ri; 2531 struct pmc_owner *po; 2532 struct pmc_binding pb; 2533 2534 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); 2535 2536 PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, 2537 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); 2538 2539 pm->pm_state = PMC_STATE_STOPPED; 2540 2541 /* 2542 * If the PMC is a virtual mode one, changing the state to 2543 * non-RUNNING is enough to ensure that the PMC never gets 2544 * scheduled. 2545 * 2546 * If this PMC is current running on a CPU, then it will 2547 * handled correctly at the time its target process is context 2548 * switched out. 2549 */ 2550 2551 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 2552 return 0; 2553 2554 /* 2555 * A system-mode PMC. Move to the CPU associated with 2556 * this PMC, and stop the hardware. We update the 2557 * 'initial count' so that a subsequent PMCSTART will 2558 * resume counting from the current hardware count. 2559 */ 2560 2561 pmc_save_cpu_binding(&pb); 2562 2563 cpu = PMC_TO_CPU(pm); 2564 2565 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 2566 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); 2567 2568 if (!pmc_cpu_is_active(cpu)) 2569 return ENXIO; 2570 2571 pmc_select_cpu(cpu); 2572 2573 ri = PMC_TO_ROWINDEX(pm); 2574 2575 critical_enter(); 2576 if ((error = md->pmd_stop_pmc(cpu, ri)) == 0) 2577 error = md->pmd_read_pmc(cpu, ri, &pm->pm_sc.pm_initial); 2578 critical_exit(); 2579 2580 pmc_restore_cpu_binding(&pb); 2581 2582 po = pm->pm_owner; 2583 2584 /* remove this owner from the global list of SS PMC owners */ 2585 if (PMC_TO_MODE(pm) == PMC_MODE_SS) { 2586 po->po_sscount--; 2587 if (po->po_sscount == 0) { 2588 atomic_subtract_rel_int(&pmc_ss_count, 1); 2589 LIST_REMOVE(po, po_ssnext); 2590 PMCDBG(PMC,OPS,2,"po=%p removed from global list", po); 2591 } 2592 } 2593 2594 return error; 2595 } 2596 2597 2598 #ifdef DEBUG 2599 static const char *pmc_op_to_name[] = { 2600 #undef __PMC_OP 2601 #define __PMC_OP(N, D) #N , 2602 __PMC_OPS() 2603 NULL 2604 }; 2605 #endif 2606 2607 /* 2608 * The syscall interface 2609 */ 2610 2611 #define PMC_GET_SX_XLOCK(...) do { \ 2612 sx_xlock(&pmc_sx); \ 2613 if (pmc_hook == NULL) { \ 2614 sx_xunlock(&pmc_sx); \ 2615 return __VA_ARGS__; \ 2616 } \ 2617 } while (0) 2618 2619 #define PMC_DOWNGRADE_SX() do { \ 2620 sx_downgrade(&pmc_sx); \ 2621 is_sx_downgraded = 1; \ 2622 } while (0) 2623 2624 static int 2625 pmc_syscall_handler(struct thread *td, void *syscall_args) 2626 { 2627 int error, is_sx_downgraded, op; 2628 struct pmc_syscall_args *c; 2629 void *arg; 2630 2631 PMC_GET_SX_XLOCK(ENOSYS); 2632 2633 DROP_GIANT(); 2634 2635 is_sx_downgraded = 0; 2636 2637 c = (struct pmc_syscall_args *) syscall_args; 2638 2639 op = c->pmop_code; 2640 arg = c->pmop_data; 2641 2642 PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, 2643 pmc_op_to_name[op], arg); 2644 2645 error = 0; 2646 atomic_add_int(&pmc_stats.pm_syscalls, 1); 2647 2648 switch(op) 2649 { 2650 2651 2652 /* 2653 * Configure a log file. 2654 * 2655 * XXX This OP will be reworked. 2656 */ 2657 2658 case PMC_OP_CONFIGURELOG: 2659 { 2660 struct proc *p; 2661 struct pmc *pm; 2662 struct pmc_owner *po; 2663 struct pmc_op_configurelog cl; 2664 2665 sx_assert(&pmc_sx, SX_XLOCKED); 2666 2667 if ((error = copyin(arg, &cl, sizeof(cl))) != 0) 2668 break; 2669 2670 /* mark this process as owning a log file */ 2671 p = td->td_proc; 2672 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2673 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { 2674 error = ENOMEM; 2675 break; 2676 } 2677 2678 /* 2679 * If a valid fd was passed in, try to configure that, 2680 * otherwise if 'fd' was less than zero and there was 2681 * a log file configured, flush its buffers and 2682 * de-configure it. 2683 */ 2684 if (cl.pm_logfd >= 0) 2685 error = pmclog_configure_log(po, cl.pm_logfd); 2686 else if (po->po_flags & PMC_PO_OWNS_LOGFILE) { 2687 pmclog_process_closelog(po); 2688 error = pmclog_flush(po); 2689 if (error == 0) { 2690 LIST_FOREACH(pm, &po->po_pmcs, pm_next) 2691 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 2692 pm->pm_state == PMC_STATE_RUNNING) 2693 pmc_stop(pm); 2694 error = pmclog_deconfigure_log(po); 2695 } 2696 } else 2697 error = EINVAL; 2698 2699 if (error) 2700 break; 2701 } 2702 break; 2703 2704 2705 /* 2706 * Flush a log file. 2707 */ 2708 2709 case PMC_OP_FLUSHLOG: 2710 { 2711 struct pmc_owner *po; 2712 2713 sx_assert(&pmc_sx, SX_XLOCKED); 2714 2715 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 2716 error = EINVAL; 2717 break; 2718 } 2719 2720 error = pmclog_flush(po); 2721 } 2722 break; 2723 2724 /* 2725 * Retrieve hardware configuration. 2726 */ 2727 2728 case PMC_OP_GETCPUINFO: /* CPU information */ 2729 { 2730 struct pmc_op_getcpuinfo gci; 2731 2732 gci.pm_cputype = md->pmd_cputype; 2733 gci.pm_ncpu = pmc_cpu_max(); 2734 gci.pm_npmc = md->pmd_npmc; 2735 gci.pm_nclass = md->pmd_nclass; 2736 bcopy(md->pmd_classes, &gci.pm_classes, 2737 sizeof(gci.pm_classes)); 2738 error = copyout(&gci, arg, sizeof(gci)); 2739 } 2740 break; 2741 2742 2743 /* 2744 * Get module statistics 2745 */ 2746 2747 case PMC_OP_GETDRIVERSTATS: 2748 { 2749 struct pmc_op_getdriverstats gms; 2750 2751 bcopy(&pmc_stats, &gms, sizeof(gms)); 2752 error = copyout(&gms, arg, sizeof(gms)); 2753 } 2754 break; 2755 2756 2757 /* 2758 * Retrieve module version number 2759 */ 2760 2761 case PMC_OP_GETMODULEVERSION: 2762 { 2763 uint32_t cv, modv; 2764 2765 /* retrieve the client's idea of the ABI version */ 2766 if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0) 2767 break; 2768 /* don't service clients newer than our driver */ 2769 modv = PMC_VERSION; 2770 if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) { 2771 error = EPROGMISMATCH; 2772 break; 2773 } 2774 error = copyout(&modv, arg, sizeof(int)); 2775 } 2776 break; 2777 2778 2779 /* 2780 * Retrieve the state of all the PMCs on a given 2781 * CPU. 2782 */ 2783 2784 case PMC_OP_GETPMCINFO: 2785 { 2786 uint32_t cpu, n, npmc; 2787 size_t pmcinfo_size; 2788 struct pmc *pm; 2789 struct pmc_info *p, *pmcinfo; 2790 struct pmc_op_getpmcinfo *gpi; 2791 struct pmc_owner *po; 2792 struct pmc_binding pb; 2793 2794 PMC_DOWNGRADE_SX(); 2795 2796 gpi = (struct pmc_op_getpmcinfo *) arg; 2797 2798 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) 2799 break; 2800 2801 if (cpu >= pmc_cpu_max()) { 2802 error = EINVAL; 2803 break; 2804 } 2805 2806 if (!pmc_cpu_is_active(cpu)) { 2807 error = ENXIO; 2808 break; 2809 } 2810 2811 /* switch to CPU 'cpu' */ 2812 pmc_save_cpu_binding(&pb); 2813 pmc_select_cpu(cpu); 2814 2815 npmc = md->pmd_npmc; 2816 2817 pmcinfo_size = npmc * sizeof(struct pmc_info); 2818 pmcinfo = malloc(pmcinfo_size, M_PMC, M_WAITOK); 2819 2820 p = pmcinfo; 2821 2822 for (n = 0; n < md->pmd_npmc; n++, p++) { 2823 2824 if ((error = md->pmd_describe(cpu, n, p, &pm)) != 0) 2825 break; 2826 2827 if (PMC_ROW_DISP_IS_STANDALONE(n)) 2828 p->pm_rowdisp = PMC_DISP_STANDALONE; 2829 else if (PMC_ROW_DISP_IS_THREAD(n)) 2830 p->pm_rowdisp = PMC_DISP_THREAD; 2831 else 2832 p->pm_rowdisp = PMC_DISP_FREE; 2833 2834 p->pm_ownerpid = -1; 2835 2836 if (pm == NULL) /* no PMC associated */ 2837 continue; 2838 2839 po = pm->pm_owner; 2840 2841 KASSERT(po->po_owner != NULL, 2842 ("[pmc,%d] pmc_owner had a null proc pointer", 2843 __LINE__)); 2844 2845 p->pm_ownerpid = po->po_owner->p_pid; 2846 p->pm_mode = PMC_TO_MODE(pm); 2847 p->pm_event = pm->pm_event; 2848 p->pm_flags = pm->pm_flags; 2849 2850 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2851 p->pm_reloadcount = 2852 pm->pm_sc.pm_reloadcount; 2853 } 2854 2855 pmc_restore_cpu_binding(&pb); 2856 2857 /* now copy out the PMC info collected */ 2858 if (error == 0) 2859 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); 2860 2861 free(pmcinfo, M_PMC); 2862 } 2863 break; 2864 2865 2866 /* 2867 * Set the administrative state of a PMC. I.e. whether 2868 * the PMC is to be used or not. 2869 */ 2870 2871 case PMC_OP_PMCADMIN: 2872 { 2873 int cpu, ri; 2874 enum pmc_state request; 2875 struct pmc_cpu *pc; 2876 struct pmc_hw *phw; 2877 struct pmc_op_pmcadmin pma; 2878 struct pmc_binding pb; 2879 2880 sx_assert(&pmc_sx, SX_XLOCKED); 2881 2882 KASSERT(td == curthread, 2883 ("[pmc,%d] td != curthread", __LINE__)); 2884 2885 error = priv_check(td, PRIV_PMC_MANAGE); 2886 if (error) 2887 break; 2888 2889 if ((error = copyin(arg, &pma, sizeof(pma))) != 0) 2890 break; 2891 2892 cpu = pma.pm_cpu; 2893 2894 if (cpu < 0 || cpu >= (int) pmc_cpu_max()) { 2895 error = EINVAL; 2896 break; 2897 } 2898 2899 if (!pmc_cpu_is_active(cpu)) { 2900 error = ENXIO; 2901 break; 2902 } 2903 2904 request = pma.pm_state; 2905 2906 if (request != PMC_STATE_DISABLED && 2907 request != PMC_STATE_FREE) { 2908 error = EINVAL; 2909 break; 2910 } 2911 2912 ri = pma.pm_pmc; /* pmc id == row index */ 2913 if (ri < 0 || ri >= (int) md->pmd_npmc) { 2914 error = EINVAL; 2915 break; 2916 } 2917 2918 /* 2919 * We can't disable a PMC with a row-index allocated 2920 * for process virtual PMCs. 2921 */ 2922 2923 if (PMC_ROW_DISP_IS_THREAD(ri) && 2924 request == PMC_STATE_DISABLED) { 2925 error = EBUSY; 2926 break; 2927 } 2928 2929 /* 2930 * otherwise, this PMC on this CPU is either free or 2931 * in system-wide mode. 2932 */ 2933 2934 pmc_save_cpu_binding(&pb); 2935 pmc_select_cpu(cpu); 2936 2937 pc = pmc_pcpu[cpu]; 2938 phw = pc->pc_hwpmcs[ri]; 2939 2940 /* 2941 * XXX do we need some kind of 'forced' disable? 2942 */ 2943 2944 if (phw->phw_pmc == NULL) { 2945 if (request == PMC_STATE_DISABLED && 2946 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { 2947 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; 2948 PMC_MARK_ROW_STANDALONE(ri); 2949 } else if (request == PMC_STATE_FREE && 2950 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { 2951 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; 2952 PMC_UNMARK_ROW_STANDALONE(ri); 2953 } 2954 /* other cases are a no-op */ 2955 } else 2956 error = EBUSY; 2957 2958 pmc_restore_cpu_binding(&pb); 2959 } 2960 break; 2961 2962 2963 /* 2964 * Allocate a PMC. 2965 */ 2966 2967 case PMC_OP_PMCALLOCATE: 2968 { 2969 uint32_t caps; 2970 u_int cpu; 2971 int n; 2972 enum pmc_mode mode; 2973 struct pmc *pmc; 2974 struct pmc_hw *phw; 2975 struct pmc_op_pmcallocate pa; 2976 struct pmc_binding pb; 2977 2978 if ((error = copyin(arg, &pa, sizeof(pa))) != 0) 2979 break; 2980 2981 caps = pa.pm_caps; 2982 mode = pa.pm_mode; 2983 cpu = pa.pm_cpu; 2984 2985 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && 2986 mode != PMC_MODE_TS && mode != PMC_MODE_TC) || 2987 (cpu != (u_int) PMC_CPU_ANY && cpu >= pmc_cpu_max())) { 2988 error = EINVAL; 2989 break; 2990 } 2991 2992 /* 2993 * Virtual PMCs should only ask for a default CPU. 2994 * System mode PMCs need to specify a non-default CPU. 2995 */ 2996 2997 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || 2998 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { 2999 error = EINVAL; 3000 break; 3001 } 3002 3003 /* 3004 * Check that an inactive CPU is not being asked for. 3005 */ 3006 3007 if (PMC_IS_SYSTEM_MODE(mode) && !pmc_cpu_is_active(cpu)) { 3008 error = ENXIO; 3009 break; 3010 } 3011 3012 /* 3013 * Refuse an allocation for a system-wide PMC if this 3014 * process has been jailed, or if this process lacks 3015 * super-user credentials and the sysctl tunable 3016 * 'security.bsd.unprivileged_syspmcs' is zero. 3017 */ 3018 3019 if (PMC_IS_SYSTEM_MODE(mode)) { 3020 if (jailed(curthread->td_ucred)) { 3021 error = EPERM; 3022 break; 3023 } 3024 if (!pmc_unprivileged_syspmcs) { 3025 error = priv_check(curthread, 3026 PRIV_PMC_SYSTEM); 3027 if (error) 3028 break; 3029 } 3030 } 3031 3032 if (error) 3033 break; 3034 3035 /* 3036 * Look for valid values for 'pm_flags' 3037 */ 3038 3039 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | 3040 PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { 3041 error = EINVAL; 3042 break; 3043 } 3044 3045 /* process logging options are not allowed for system PMCs */ 3046 if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags & 3047 (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) { 3048 error = EINVAL; 3049 break; 3050 } 3051 3052 /* 3053 * All sampling mode PMCs need to be able to interrupt the 3054 * CPU. 3055 */ 3056 if (PMC_IS_SAMPLING_MODE(mode)) 3057 caps |= PMC_CAP_INTERRUPT; 3058 3059 /* A valid class specifier should have been passed in. */ 3060 for (n = 0; n < md->pmd_nclass; n++) 3061 if (md->pmd_classes[n].pm_class == pa.pm_class) 3062 break; 3063 if (n == md->pmd_nclass) { 3064 error = EINVAL; 3065 break; 3066 } 3067 3068 /* The requested PMC capabilities should be feasible. */ 3069 if ((md->pmd_classes[n].pm_caps & caps) != caps) { 3070 error = EOPNOTSUPP; 3071 break; 3072 } 3073 3074 PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", 3075 pa.pm_ev, caps, mode, cpu); 3076 3077 pmc = pmc_allocate_pmc_descriptor(); 3078 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, 3079 PMC_ID_INVALID); 3080 pmc->pm_event = pa.pm_ev; 3081 pmc->pm_state = PMC_STATE_FREE; 3082 pmc->pm_caps = caps; 3083 pmc->pm_flags = pa.pm_flags; 3084 3085 /* switch thread to CPU 'cpu' */ 3086 pmc_save_cpu_binding(&pb); 3087 3088 #define PMC_IS_SHAREABLE_PMC(cpu, n) \ 3089 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ 3090 PMC_PHW_FLAG_IS_SHAREABLE) 3091 #define PMC_IS_UNALLOCATED(cpu, n) \ 3092 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) 3093 3094 if (PMC_IS_SYSTEM_MODE(mode)) { 3095 pmc_select_cpu(cpu); 3096 for (n = 0; n < (int) md->pmd_npmc; n++) 3097 if (pmc_can_allocate_row(n, mode) == 0 && 3098 pmc_can_allocate_rowindex( 3099 curthread->td_proc, n, cpu) == 0 && 3100 (PMC_IS_UNALLOCATED(cpu, n) || 3101 PMC_IS_SHAREABLE_PMC(cpu, n)) && 3102 md->pmd_allocate_pmc(cpu, n, pmc, 3103 &pa) == 0) 3104 break; 3105 } else { 3106 /* Process virtual mode */ 3107 for (n = 0; n < (int) md->pmd_npmc; n++) { 3108 if (pmc_can_allocate_row(n, mode) == 0 && 3109 pmc_can_allocate_rowindex( 3110 curthread->td_proc, n, 3111 PMC_CPU_ANY) == 0 && 3112 md->pmd_allocate_pmc(curthread->td_oncpu, 3113 n, pmc, &pa) == 0) 3114 break; 3115 } 3116 } 3117 3118 #undef PMC_IS_UNALLOCATED 3119 #undef PMC_IS_SHAREABLE_PMC 3120 3121 pmc_restore_cpu_binding(&pb); 3122 3123 if (n == (int) md->pmd_npmc) { 3124 pmc_destroy_pmc_descriptor(pmc); 3125 free(pmc, M_PMC); 3126 pmc = NULL; 3127 error = EINVAL; 3128 break; 3129 } 3130 3131 /* Fill in the correct value in the ID field */ 3132 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); 3133 3134 PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", 3135 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); 3136 3137 /* Process mode PMCs with logging enabled need log files */ 3138 if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW)) 3139 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3140 3141 /* All system mode sampling PMCs require a log file */ 3142 if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode)) 3143 pmc->pm_flags |= PMC_F_NEEDS_LOGFILE; 3144 3145 /* 3146 * Configure global pmc's immediately 3147 */ 3148 3149 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { 3150 3151 pmc_save_cpu_binding(&pb); 3152 pmc_select_cpu(cpu); 3153 3154 phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; 3155 3156 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || 3157 (error = md->pmd_config_pmc(cpu, n, pmc)) != 0) { 3158 (void) md->pmd_release_pmc(cpu, n, pmc); 3159 pmc_destroy_pmc_descriptor(pmc); 3160 free(pmc, M_PMC); 3161 pmc = NULL; 3162 pmc_restore_cpu_binding(&pb); 3163 error = EPERM; 3164 break; 3165 } 3166 3167 pmc_restore_cpu_binding(&pb); 3168 } 3169 3170 pmc->pm_state = PMC_STATE_ALLOCATED; 3171 3172 /* 3173 * mark row disposition 3174 */ 3175 3176 if (PMC_IS_SYSTEM_MODE(mode)) 3177 PMC_MARK_ROW_STANDALONE(n); 3178 else 3179 PMC_MARK_ROW_THREAD(n); 3180 3181 /* 3182 * Register this PMC with the current thread as its owner. 3183 */ 3184 3185 if ((error = 3186 pmc_register_owner(curthread->td_proc, pmc)) != 0) { 3187 pmc_release_pmc_descriptor(pmc); 3188 free(pmc, M_PMC); 3189 pmc = NULL; 3190 break; 3191 } 3192 3193 /* 3194 * Return the allocated index. 3195 */ 3196 3197 pa.pm_pmcid = pmc->pm_id; 3198 3199 error = copyout(&pa, arg, sizeof(pa)); 3200 } 3201 break; 3202 3203 3204 /* 3205 * Attach a PMC to a process. 3206 */ 3207 3208 case PMC_OP_PMCATTACH: 3209 { 3210 struct pmc *pm; 3211 struct proc *p; 3212 struct pmc_op_pmcattach a; 3213 3214 sx_assert(&pmc_sx, SX_XLOCKED); 3215 3216 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3217 break; 3218 3219 if (a.pm_pid < 0) { 3220 error = EINVAL; 3221 break; 3222 } else if (a.pm_pid == 0) 3223 a.pm_pid = td->td_proc->p_pid; 3224 3225 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3226 break; 3227 3228 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 3229 error = EINVAL; 3230 break; 3231 } 3232 3233 /* PMCs may be (re)attached only when allocated or stopped */ 3234 if (pm->pm_state == PMC_STATE_RUNNING) { 3235 error = EBUSY; 3236 break; 3237 } else if (pm->pm_state != PMC_STATE_ALLOCATED && 3238 pm->pm_state != PMC_STATE_STOPPED) { 3239 error = EINVAL; 3240 break; 3241 } 3242 3243 /* lookup pid */ 3244 if ((p = pfind(a.pm_pid)) == NULL) { 3245 error = ESRCH; 3246 break; 3247 } 3248 3249 /* 3250 * Ignore processes that are working on exiting. 3251 */ 3252 if (p->p_flag & P_WEXIT) { 3253 error = ESRCH; 3254 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3255 break; 3256 } 3257 3258 /* 3259 * we are allowed to attach a PMC to a process if 3260 * we can debug it. 3261 */ 3262 error = p_candebug(curthread, p); 3263 3264 PROC_UNLOCK(p); 3265 3266 if (error == 0) 3267 error = pmc_attach_process(p, pm); 3268 } 3269 break; 3270 3271 3272 /* 3273 * Detach an attached PMC from a process. 3274 */ 3275 3276 case PMC_OP_PMCDETACH: 3277 { 3278 struct pmc *pm; 3279 struct proc *p; 3280 struct pmc_op_pmcattach a; 3281 3282 if ((error = copyin(arg, &a, sizeof(a))) != 0) 3283 break; 3284 3285 if (a.pm_pid < 0) { 3286 error = EINVAL; 3287 break; 3288 } else if (a.pm_pid == 0) 3289 a.pm_pid = td->td_proc->p_pid; 3290 3291 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 3292 break; 3293 3294 if ((p = pfind(a.pm_pid)) == NULL) { 3295 error = ESRCH; 3296 break; 3297 } 3298 3299 /* 3300 * Treat processes that are in the process of exiting 3301 * as if they were not present. 3302 */ 3303 3304 if (p->p_flag & P_WEXIT) 3305 error = ESRCH; 3306 3307 PROC_UNLOCK(p); /* pfind() returns a locked process */ 3308 3309 if (error == 0) 3310 error = pmc_detach_process(p, pm); 3311 } 3312 break; 3313 3314 3315 /* 3316 * Retrieve the MSR number associated with the counter 3317 * 'pmc_id'. This allows processes to directly use RDPMC 3318 * instructions to read their PMCs, without the overhead of a 3319 * system call. 3320 */ 3321 3322 case PMC_OP_PMCGETMSR: 3323 { 3324 int ri; 3325 struct pmc *pm; 3326 struct pmc_target *pt; 3327 struct pmc_op_getmsr gm; 3328 3329 PMC_DOWNGRADE_SX(); 3330 3331 /* CPU has no 'GETMSR' support */ 3332 if (md->pmd_get_msr == NULL) { 3333 error = ENOSYS; 3334 break; 3335 } 3336 3337 if ((error = copyin(arg, &gm, sizeof(gm))) != 0) 3338 break; 3339 3340 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) 3341 break; 3342 3343 /* 3344 * The allocated PMC has to be a process virtual PMC, 3345 * i.e., of type MODE_T[CS]. Global PMCs can only be 3346 * read using the PMCREAD operation since they may be 3347 * allocated on a different CPU than the one we could 3348 * be running on at the time of the RDPMC instruction. 3349 * 3350 * The GETMSR operation is not allowed for PMCs that 3351 * are inherited across processes. 3352 */ 3353 3354 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || 3355 (pm->pm_flags & PMC_F_DESCENDANTS)) { 3356 error = EINVAL; 3357 break; 3358 } 3359 3360 /* 3361 * It only makes sense to use a RDPMC (or its 3362 * equivalent instruction on non-x86 architectures) on 3363 * a process that has allocated and attached a PMC to 3364 * itself. Conversely the PMC is only allowed to have 3365 * one process attached to it -- its owner. 3366 */ 3367 3368 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || 3369 LIST_NEXT(pt, pt_next) != NULL || 3370 pt->pt_process->pp_proc != pm->pm_owner->po_owner) { 3371 error = EINVAL; 3372 break; 3373 } 3374 3375 ri = PMC_TO_ROWINDEX(pm); 3376 3377 if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0) 3378 break; 3379 3380 if ((error = copyout(&gm, arg, sizeof(gm))) < 0) 3381 break; 3382 3383 /* 3384 * Mark our process as using MSRs. Update machine 3385 * state using a forced context switch. 3386 */ 3387 3388 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; 3389 pmc_force_context_switch(); 3390 3391 } 3392 break; 3393 3394 /* 3395 * Release an allocated PMC 3396 */ 3397 3398 case PMC_OP_PMCRELEASE: 3399 { 3400 pmc_id_t pmcid; 3401 struct pmc *pm; 3402 struct pmc_owner *po; 3403 struct pmc_op_simple sp; 3404 3405 /* 3406 * Find PMC pointer for the named PMC. 3407 * 3408 * Use pmc_release_pmc_descriptor() to switch off the 3409 * PMC, remove all its target threads, and remove the 3410 * PMC from its owner's list. 3411 * 3412 * Remove the owner record if this is the last PMC 3413 * owned. 3414 * 3415 * Free up space. 3416 */ 3417 3418 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3419 break; 3420 3421 pmcid = sp.pm_pmcid; 3422 3423 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3424 break; 3425 3426 po = pm->pm_owner; 3427 pmc_release_pmc_descriptor(pm); 3428 pmc_maybe_remove_owner(po); 3429 3430 free(pm, M_PMC); 3431 } 3432 break; 3433 3434 3435 /* 3436 * Read and/or write a PMC. 3437 */ 3438 3439 case PMC_OP_PMCRW: 3440 { 3441 uint32_t cpu, ri; 3442 struct pmc *pm; 3443 struct pmc_op_pmcrw *pprw; 3444 struct pmc_op_pmcrw prw; 3445 struct pmc_binding pb; 3446 pmc_value_t oldvalue; 3447 3448 PMC_DOWNGRADE_SX(); 3449 3450 if ((error = copyin(arg, &prw, sizeof(prw))) != 0) 3451 break; 3452 3453 ri = 0; 3454 PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, 3455 prw.pm_flags); 3456 3457 /* must have at least one flag set */ 3458 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { 3459 error = EINVAL; 3460 break; 3461 } 3462 3463 /* locate pmc descriptor */ 3464 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) 3465 break; 3466 3467 /* Can't read a PMC that hasn't been started. */ 3468 if (pm->pm_state != PMC_STATE_ALLOCATED && 3469 pm->pm_state != PMC_STATE_STOPPED && 3470 pm->pm_state != PMC_STATE_RUNNING) { 3471 error = EINVAL; 3472 break; 3473 } 3474 3475 /* writing a new value is allowed only for 'STOPPED' pmcs */ 3476 if (pm->pm_state == PMC_STATE_RUNNING && 3477 (prw.pm_flags & PMC_F_NEWVALUE)) { 3478 error = EBUSY; 3479 break; 3480 } 3481 3482 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 3483 3484 /* 3485 * If this PMC is attached to its owner (i.e., 3486 * the process requesting this operation) and 3487 * is running, then attempt to get an 3488 * upto-date reading from hardware for a READ. 3489 * Writes are only allowed when the PMC is 3490 * stopped, so only update the saved value 3491 * field. 3492 * 3493 * If the PMC is not running, or is not 3494 * attached to its owner, read/write to the 3495 * savedvalue field. 3496 */ 3497 3498 ri = PMC_TO_ROWINDEX(pm); 3499 3500 mtx_pool_lock_spin(pmc_mtxpool, pm); 3501 cpu = curthread->td_oncpu; 3502 3503 if (prw.pm_flags & PMC_F_OLDVALUE) { 3504 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && 3505 (pm->pm_state == PMC_STATE_RUNNING)) 3506 error = (*md->pmd_read_pmc)(cpu, ri, 3507 &oldvalue); 3508 else 3509 oldvalue = pm->pm_gv.pm_savedvalue; 3510 } 3511 if (prw.pm_flags & PMC_F_NEWVALUE) 3512 pm->pm_gv.pm_savedvalue = prw.pm_value; 3513 3514 mtx_pool_unlock_spin(pmc_mtxpool, pm); 3515 3516 } else { /* System mode PMCs */ 3517 cpu = PMC_TO_CPU(pm); 3518 ri = PMC_TO_ROWINDEX(pm); 3519 3520 if (!pmc_cpu_is_active(cpu)) { 3521 error = ENXIO; 3522 break; 3523 } 3524 3525 /* move this thread to CPU 'cpu' */ 3526 pmc_save_cpu_binding(&pb); 3527 pmc_select_cpu(cpu); 3528 3529 critical_enter(); 3530 /* save old value */ 3531 if (prw.pm_flags & PMC_F_OLDVALUE) 3532 if ((error = (*md->pmd_read_pmc)(cpu, ri, 3533 &oldvalue))) 3534 goto error; 3535 /* write out new value */ 3536 if (prw.pm_flags & PMC_F_NEWVALUE) 3537 error = (*md->pmd_write_pmc)(cpu, ri, 3538 prw.pm_value); 3539 error: 3540 critical_exit(); 3541 pmc_restore_cpu_binding(&pb); 3542 if (error) 3543 break; 3544 } 3545 3546 pprw = (struct pmc_op_pmcrw *) arg; 3547 3548 #ifdef DEBUG 3549 if (prw.pm_flags & PMC_F_NEWVALUE) 3550 PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx", 3551 ri, prw.pm_value, oldvalue); 3552 else if (prw.pm_flags & PMC_F_OLDVALUE) 3553 PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); 3554 #endif 3555 3556 /* return old value if requested */ 3557 if (prw.pm_flags & PMC_F_OLDVALUE) 3558 if ((error = copyout(&oldvalue, &pprw->pm_value, 3559 sizeof(prw.pm_value)))) 3560 break; 3561 3562 } 3563 break; 3564 3565 3566 /* 3567 * Set the sampling rate for a sampling mode PMC and the 3568 * initial count for a counting mode PMC. 3569 */ 3570 3571 case PMC_OP_PMCSETCOUNT: 3572 { 3573 struct pmc *pm; 3574 struct pmc_op_pmcsetcount sc; 3575 3576 PMC_DOWNGRADE_SX(); 3577 3578 if ((error = copyin(arg, &sc, sizeof(sc))) != 0) 3579 break; 3580 3581 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) 3582 break; 3583 3584 if (pm->pm_state == PMC_STATE_RUNNING) { 3585 error = EBUSY; 3586 break; 3587 } 3588 3589 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 3590 pm->pm_sc.pm_reloadcount = sc.pm_count; 3591 else 3592 pm->pm_sc.pm_initial = sc.pm_count; 3593 } 3594 break; 3595 3596 3597 /* 3598 * Start a PMC. 3599 */ 3600 3601 case PMC_OP_PMCSTART: 3602 { 3603 pmc_id_t pmcid; 3604 struct pmc *pm; 3605 struct pmc_op_simple sp; 3606 3607 sx_assert(&pmc_sx, SX_XLOCKED); 3608 3609 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3610 break; 3611 3612 pmcid = sp.pm_pmcid; 3613 3614 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3615 break; 3616 3617 KASSERT(pmcid == pm->pm_id, 3618 ("[pmc,%d] pmcid %x != id %x", __LINE__, 3619 pm->pm_id, pmcid)); 3620 3621 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ 3622 break; 3623 else if (pm->pm_state != PMC_STATE_STOPPED && 3624 pm->pm_state != PMC_STATE_ALLOCATED) { 3625 error = EINVAL; 3626 break; 3627 } 3628 3629 error = pmc_start(pm); 3630 } 3631 break; 3632 3633 3634 /* 3635 * Stop a PMC. 3636 */ 3637 3638 case PMC_OP_PMCSTOP: 3639 { 3640 pmc_id_t pmcid; 3641 struct pmc *pm; 3642 struct pmc_op_simple sp; 3643 3644 PMC_DOWNGRADE_SX(); 3645 3646 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3647 break; 3648 3649 pmcid = sp.pm_pmcid; 3650 3651 /* 3652 * Mark the PMC as inactive and invoke the MD stop 3653 * routines if needed. 3654 */ 3655 3656 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3657 break; 3658 3659 KASSERT(pmcid == pm->pm_id, 3660 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, 3661 pm->pm_id, pmcid)); 3662 3663 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ 3664 break; 3665 else if (pm->pm_state != PMC_STATE_RUNNING) { 3666 error = EINVAL; 3667 break; 3668 } 3669 3670 error = pmc_stop(pm); 3671 } 3672 break; 3673 3674 3675 /* 3676 * Write a user supplied value to the log file. 3677 */ 3678 3679 case PMC_OP_WRITELOG: 3680 { 3681 struct pmc_op_writelog wl; 3682 struct pmc_owner *po; 3683 3684 PMC_DOWNGRADE_SX(); 3685 3686 if ((error = copyin(arg, &wl, sizeof(wl))) != 0) 3687 break; 3688 3689 if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) { 3690 error = EINVAL; 3691 break; 3692 } 3693 3694 if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) { 3695 error = EINVAL; 3696 break; 3697 } 3698 3699 error = pmclog_process_userlog(po, &wl); 3700 } 3701 break; 3702 3703 3704 default: 3705 error = EINVAL; 3706 break; 3707 } 3708 3709 if (is_sx_downgraded) 3710 sx_sunlock(&pmc_sx); 3711 else 3712 sx_xunlock(&pmc_sx); 3713 3714 if (error) 3715 atomic_add_int(&pmc_stats.pm_syscall_errors, 1); 3716 3717 PICKUP_GIANT(); 3718 3719 return error; 3720 } 3721 3722 /* 3723 * Helper functions 3724 */ 3725 3726 3727 /* 3728 * Mark the thread as needing callchain capture and post an AST. The 3729 * actual callchain capture will be done in a context where it is safe 3730 * to take page faults. 3731 */ 3732 3733 static void 3734 pmc_post_callchain_ast(void) 3735 { 3736 struct thread *td; 3737 3738 td = curthread; 3739 3740 /* 3741 * Mark this thread as needing processing in ast(). 3742 * td->td_pflags will be safe to touch as the process was in 3743 * user space when it was interrupted. 3744 */ 3745 td->td_pflags |= TDP_CALLCHAIN; 3746 3747 /* 3748 * Again, since we've entered this function directly from 3749 * userland, `td' is guaranteed to be not locked by this CPU, 3750 * so its safe to try acquire the thread lock even though we 3751 * are executing in an NMI context. We need to acquire this 3752 * lock before touching `td_flags' because other CPUs may be 3753 * in the process of touching this field. 3754 */ 3755 thread_lock(td); 3756 td->td_flags |= TDF_ASTPENDING; 3757 thread_unlock(td); 3758 3759 return; 3760 } 3761 3762 /* 3763 * Interrupt processing. 3764 * 3765 * Find a free slot in the per-cpu array of samples and capture the 3766 * current callchain there. If a sample was successfully added, a bit 3767 * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook 3768 * needs to be invoked from the clock handler. 3769 * 3770 * This function is meant to be called from an NMI handler. It cannot 3771 * use any of the locking primitives supplied by the OS. 3772 */ 3773 3774 int 3775 pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, 3776 int inuserspace) 3777 { 3778 int error, callchaindepth; 3779 struct thread *td; 3780 struct pmc_sample *ps; 3781 struct pmc_samplebuffer *psb; 3782 3783 error = 0; 3784 3785 /* 3786 * Allocate space for a sample buffer. 3787 */ 3788 psb = pmc_pcpu[cpu]->pc_sb; 3789 3790 ps = psb->ps_write; 3791 if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ 3792 pm->pm_stalled = 1; 3793 atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); 3794 PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", 3795 cpu, pm, (void *) tf, inuserspace, 3796 (int) (psb->ps_write - psb->ps_samples), 3797 (int) (psb->ps_read - psb->ps_samples)); 3798 error = ENOMEM; 3799 goto done; 3800 } 3801 3802 3803 /* Fill in entry. */ 3804 PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, 3805 (void *) tf, inuserspace, 3806 (int) (psb->ps_write - psb->ps_samples), 3807 (int) (psb->ps_read - psb->ps_samples)); 3808 3809 atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */ 3810 ps->ps_pmc = pm; 3811 if ((td = curthread) && td->td_proc) 3812 ps->ps_pid = td->td_proc->p_pid; 3813 else 3814 ps->ps_pid = -1; 3815 ps->ps_cpu = cpu; 3816 ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; 3817 3818 callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? 3819 pmc_callchaindepth : 1; 3820 3821 if (callchaindepth == 1) 3822 ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); 3823 else { 3824 /* 3825 * Kernel stack traversals can be done immediately, 3826 * while we defer to an AST for user space traversals. 3827 */ 3828 if (!inuserspace) 3829 callchaindepth = 3830 pmc_save_kernel_callchain(ps->ps_pc, 3831 callchaindepth, tf); 3832 else { 3833 pmc_post_callchain_ast(); 3834 callchaindepth = PMC_SAMPLE_INUSE; 3835 } 3836 } 3837 3838 ps->ps_nsamples = callchaindepth; /* mark entry as in use */ 3839 3840 /* increment write pointer, modulo ring buffer size */ 3841 ps++; 3842 if (ps == psb->ps_fence) 3843 psb->ps_write = psb->ps_samples; 3844 else 3845 psb->ps_write = ps; 3846 3847 done: 3848 /* mark CPU as needing processing */ 3849 atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); 3850 3851 return (error); 3852 } 3853 3854 /* 3855 * Capture a user call chain. This function will be called from ast() 3856 * before control returns to userland and before the process gets 3857 * rescheduled. 3858 */ 3859 3860 static void 3861 pmc_capture_user_callchain(int cpu, struct trapframe *tf) 3862 { 3863 int i; 3864 struct pmc *pm; 3865 struct pmc_sample *ps; 3866 struct pmc_samplebuffer *psb; 3867 3868 psb = pmc_pcpu[cpu]->pc_sb; 3869 3870 /* 3871 * Iterate through all deferred callchain requests. 3872 */ 3873 3874 for (i = 0; i < pmc_nsamples; i++) { 3875 3876 ps = &psb->ps_samples[i]; 3877 if (ps->ps_nsamples != PMC_SAMPLE_INUSE) 3878 continue; 3879 3880 pm = ps->ps_pmc; 3881 3882 KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, 3883 ("[pmc,%d] Retrieving callchain for PMC that doesn't " 3884 "want it", __LINE__)); 3885 3886 /* 3887 * Retrieve the callchain and mark the sample buffer 3888 * as 'processable' by the timer tick sweep code. 3889 */ 3890 ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, 3891 pmc_callchaindepth, tf); 3892 } 3893 3894 return; 3895 } 3896 3897 3898 /* 3899 * Process saved PC samples. 3900 */ 3901 3902 static void 3903 pmc_process_samples(int cpu) 3904 { 3905 int n, ri; 3906 struct pmc *pm; 3907 struct thread *td; 3908 struct pmc_owner *po; 3909 struct pmc_sample *ps; 3910 struct pmc_samplebuffer *psb; 3911 3912 KASSERT(PCPU_GET(cpuid) == cpu, 3913 ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, 3914 PCPU_GET(cpuid), cpu)); 3915 3916 psb = pmc_pcpu[cpu]->pc_sb; 3917 3918 for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ 3919 3920 ps = psb->ps_read; 3921 if (ps->ps_nsamples == PMC_SAMPLE_FREE) 3922 break; 3923 if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { 3924 /* Need a rescan at a later time. */ 3925 atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); 3926 break; 3927 } 3928 3929 pm = ps->ps_pmc; 3930 po = pm->pm_owner; 3931 3932 KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), 3933 ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, 3934 pm, PMC_TO_MODE(pm))); 3935 3936 /* Ignore PMCs that have been switched off */ 3937 if (pm->pm_state != PMC_STATE_RUNNING) 3938 goto entrydone; 3939 3940 PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, 3941 pm, ps->ps_nsamples, ps->ps_flags, 3942 (int) (psb->ps_write - psb->ps_samples), 3943 (int) (psb->ps_read - psb->ps_samples)); 3944 3945 /* 3946 * If this is a process-mode PMC that is attached to 3947 * its owner, and if the PC is in user mode, update 3948 * profiling statistics like timer-based profiling 3949 * would have done. 3950 */ 3951 if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { 3952 if (ps->ps_flags & PMC_CC_F_USERSPACE) { 3953 td = FIRST_THREAD_IN_PROC(po->po_owner); 3954 addupc_intr(td, ps->ps_pc[0], 1); 3955 } 3956 goto entrydone; 3957 } 3958 3959 /* 3960 * Otherwise, this is either a sampling mode PMC that 3961 * is attached to a different process than its owner, 3962 * or a system-wide sampling PMC. Dispatch a log 3963 * entry to the PMC's owner process. 3964 */ 3965 3966 pmclog_process_callchain(pm, ps); 3967 3968 entrydone: 3969 ps->ps_nsamples = 0; /* mark entry as free */ 3970 atomic_subtract_rel_32(&pm->pm_runcount, 1); 3971 3972 /* increment read pointer, modulo sample size */ 3973 if (++ps == psb->ps_fence) 3974 psb->ps_read = psb->ps_samples; 3975 else 3976 psb->ps_read = ps; 3977 } 3978 3979 atomic_add_int(&pmc_stats.pm_log_sweeps, 1); 3980 3981 /* Do not re-enable stalled PMCs if we failed to process any samples */ 3982 if (n == 0) 3983 return; 3984 3985 /* 3986 * Restart any stalled sampling PMCs on this CPU. 3987 * 3988 * If the NMI handler sets the pm_stalled field of a PMC after 3989 * the check below, we'll end up processing the stalled PMC at 3990 * the next hardclock tick. 3991 */ 3992 for (n = 0; n < md->pmd_npmc; n++) { 3993 (void) (*md->pmd_get_config)(cpu,n,&pm); 3994 if (pm == NULL || /* !cfg'ed */ 3995 pm->pm_state != PMC_STATE_RUNNING || /* !active */ 3996 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ 3997 pm->pm_stalled == 0) /* !stalled */ 3998 continue; 3999 4000 pm->pm_stalled = 0; 4001 ri = PMC_TO_ROWINDEX(pm); 4002 (*md->pmd_start_pmc)(cpu, ri); 4003 } 4004 } 4005 4006 /* 4007 * Event handlers. 4008 */ 4009 4010 /* 4011 * Handle a process exit. 4012 * 4013 * Remove this process from all hash tables. If this process 4014 * owned any PMCs, turn off those PMCs and deallocate them, 4015 * removing any associations with target processes. 4016 * 4017 * This function will be called by the last 'thread' of a 4018 * process. 4019 * 4020 * XXX This eventhandler gets called early in the exit process. 4021 * Consider using a 'hook' invocation from thread_exit() or equivalent 4022 * spot. Another negative is that kse_exit doesn't seem to call 4023 * exit1() [??]. 4024 * 4025 */ 4026 4027 static void 4028 pmc_process_exit(void *arg __unused, struct proc *p) 4029 { 4030 int is_using_hwpmcs; 4031 int cpu; 4032 unsigned int ri; 4033 struct pmc *pm; 4034 struct pmc_process *pp; 4035 struct pmc_owner *po; 4036 pmc_value_t newvalue, tmp; 4037 4038 PROC_LOCK(p); 4039 is_using_hwpmcs = p->p_flag & P_HWPMC; 4040 PROC_UNLOCK(p); 4041 4042 /* 4043 * Log a sysexit event to all SS PMC owners. 4044 */ 4045 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4046 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4047 pmclog_process_sysexit(po, p->p_pid); 4048 4049 if (!is_using_hwpmcs) 4050 return; 4051 4052 PMC_GET_SX_XLOCK(); 4053 PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, 4054 p->p_comm); 4055 4056 /* 4057 * Since this code is invoked by the last thread in an exiting 4058 * process, we would have context switched IN at some prior 4059 * point. However, with PREEMPTION, kernel mode context 4060 * switches may happen any time, so we want to disable a 4061 * context switch OUT till we get any PMCs targetting this 4062 * process off the hardware. 4063 * 4064 * We also need to atomically remove this process' 4065 * entry from our target process hash table, using 4066 * PMC_FLAG_REMOVE. 4067 */ 4068 PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, 4069 p->p_comm); 4070 4071 critical_enter(); /* no preemption */ 4072 4073 cpu = curthread->td_oncpu; 4074 4075 if ((pp = pmc_find_process_descriptor(p, 4076 PMC_FLAG_REMOVE)) != NULL) { 4077 4078 PMCDBG(PRC,EXT,2, 4079 "process-exit proc=%p pmc-process=%p", p, pp); 4080 4081 /* 4082 * The exiting process could the target of 4083 * some PMCs which will be running on 4084 * currently executing CPU. 4085 * 4086 * We need to turn these PMCs off like we 4087 * would do at context switch OUT time. 4088 */ 4089 for (ri = 0; ri < md->pmd_npmc; ri++) { 4090 4091 /* 4092 * Pick up the pmc pointer from hardware 4093 * state similar to the CSW_OUT code. 4094 */ 4095 pm = NULL; 4096 (void) (*md->pmd_get_config)(cpu, ri, &pm); 4097 4098 PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm); 4099 4100 if (pm == NULL || 4101 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 4102 continue; 4103 4104 PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " 4105 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, 4106 pm, pm->pm_state); 4107 4108 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 4109 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 4110 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 4111 4112 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 4113 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", 4114 __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); 4115 4116 (void) md->pmd_stop_pmc(cpu, ri); 4117 4118 KASSERT(pm->pm_runcount > 0, 4119 ("[pmc,%d] bad runcount ri %d rc %d", 4120 __LINE__, ri, pm->pm_runcount)); 4121 4122 /* Stop hardware only if it is actually running */ 4123 if (pm->pm_state == PMC_STATE_RUNNING && 4124 pm->pm_stalled == 0) { 4125 md->pmd_read_pmc(cpu, ri, &newvalue); 4126 tmp = newvalue - 4127 PMC_PCPU_SAVED(cpu,ri); 4128 4129 mtx_pool_lock_spin(pmc_mtxpool, pm); 4130 pm->pm_gv.pm_savedvalue += tmp; 4131 pp->pp_pmcs[ri].pp_pmcval += tmp; 4132 mtx_pool_unlock_spin(pmc_mtxpool, pm); 4133 } 4134 4135 atomic_subtract_rel_32(&pm->pm_runcount,1); 4136 4137 KASSERT((int) pm->pm_runcount >= 0, 4138 ("[pmc,%d] runcount is %d", __LINE__, ri)); 4139 4140 (void) md->pmd_config_pmc(cpu, ri, NULL); 4141 } 4142 4143 /* 4144 * Inform the MD layer of this pseudo "context switch 4145 * out" 4146 */ 4147 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); 4148 4149 critical_exit(); /* ok to be pre-empted now */ 4150 4151 /* 4152 * Unlink this process from the PMCs that are 4153 * targetting it. This will send a signal to 4154 * all PMC owner's whose PMCs are orphaned. 4155 * 4156 * Log PMC value at exit time if requested. 4157 */ 4158 for (ri = 0; ri < md->pmd_npmc; ri++) 4159 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 4160 if (pm->pm_flags & PMC_F_NEEDS_LOGFILE && 4161 PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm))) 4162 pmclog_process_procexit(pm, pp); 4163 pmc_unlink_target_process(pm, pp); 4164 } 4165 free(pp, M_PMC); 4166 4167 } else 4168 critical_exit(); /* pp == NULL */ 4169 4170 4171 /* 4172 * If the process owned PMCs, free them up and free up 4173 * memory. 4174 */ 4175 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 4176 pmc_remove_owner(po); 4177 pmc_destroy_owner_descriptor(po); 4178 } 4179 4180 sx_xunlock(&pmc_sx); 4181 } 4182 4183 /* 4184 * Handle a process fork. 4185 * 4186 * If the parent process 'p1' is under HWPMC monitoring, then copy 4187 * over any attached PMCs that have 'do_descendants' semantics. 4188 */ 4189 4190 static void 4191 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc, 4192 int flags) 4193 { 4194 int is_using_hwpmcs; 4195 unsigned int ri; 4196 uint32_t do_descendants; 4197 struct pmc *pm; 4198 struct pmc_owner *po; 4199 struct pmc_process *ppnew, *ppold; 4200 4201 (void) flags; /* unused parameter */ 4202 4203 PROC_LOCK(p1); 4204 is_using_hwpmcs = p1->p_flag & P_HWPMC; 4205 PROC_UNLOCK(p1); 4206 4207 /* 4208 * If there are system-wide sampling PMCs active, we need to 4209 * log all fork events to their owner's logs. 4210 */ 4211 4212 LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) 4213 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 4214 pmclog_process_procfork(po, p1->p_pid, newproc->p_pid); 4215 4216 if (!is_using_hwpmcs) 4217 return; 4218 4219 PMC_GET_SX_XLOCK(); 4220 PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1, 4221 p1->p_pid, p1->p_comm, newproc); 4222 4223 /* 4224 * If the parent process (curthread->td_proc) is a 4225 * target of any PMCs, look for PMCs that are to be 4226 * inherited, and link these into the new process 4227 * descriptor. 4228 */ 4229 if ((ppold = pmc_find_process_descriptor(curthread->td_proc, 4230 PMC_FLAG_NONE)) == NULL) 4231 goto done; /* nothing to do */ 4232 4233 do_descendants = 0; 4234 for (ri = 0; ri < md->pmd_npmc; ri++) 4235 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) 4236 do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS; 4237 if (do_descendants == 0) /* nothing to do */ 4238 goto done; 4239 4240 /* allocate a descriptor for the new process */ 4241 if ((ppnew = pmc_find_process_descriptor(newproc, 4242 PMC_FLAG_ALLOCATE)) == NULL) 4243 goto done; 4244 4245 /* 4246 * Run through all PMCs that were targeting the old process 4247 * and which specified F_DESCENDANTS and attach them to the 4248 * new process. 4249 * 4250 * Log the fork event to all owners of PMCs attached to this 4251 * process, if not already logged. 4252 */ 4253 for (ri = 0; ri < md->pmd_npmc; ri++) 4254 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && 4255 (pm->pm_flags & PMC_F_DESCENDANTS)) { 4256 pmc_link_target_process(pm, ppnew); 4257 po = pm->pm_owner; 4258 if (po->po_sscount == 0 && 4259 po->po_flags & PMC_PO_OWNS_LOGFILE) 4260 pmclog_process_procfork(po, p1->p_pid, 4261 newproc->p_pid); 4262 } 4263 4264 /* 4265 * Now mark the new process as being tracked by this driver. 4266 */ 4267 PROC_LOCK(newproc); 4268 newproc->p_flag |= P_HWPMC; 4269 PROC_UNLOCK(newproc); 4270 4271 done: 4272 sx_xunlock(&pmc_sx); 4273 } 4274 4275 4276 /* 4277 * initialization 4278 */ 4279 4280 static const char *pmc_name_of_pmcclass[] = { 4281 #undef __PMC_CLASS 4282 #define __PMC_CLASS(N) #N , 4283 __PMC_CLASSES() 4284 }; 4285 4286 static int 4287 pmc_initialize(void) 4288 { 4289 int cpu, error, n; 4290 unsigned int maxcpu; 4291 struct pmc_binding pb; 4292 struct pmc_sample *ps; 4293 struct pmc_samplebuffer *sb; 4294 4295 md = NULL; 4296 error = 0; 4297 4298 #ifdef DEBUG 4299 /* parse debug flags first */ 4300 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", 4301 pmc_debugstr, sizeof(pmc_debugstr))) 4302 pmc_debugflags_parse(pmc_debugstr, 4303 pmc_debugstr+strlen(pmc_debugstr)); 4304 #endif 4305 4306 PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); 4307 4308 /* check kernel version */ 4309 if (pmc_kernel_version != PMC_VERSION) { 4310 if (pmc_kernel_version == 0) 4311 printf("hwpmc: this kernel has not been compiled with " 4312 "'options HWPMC_HOOKS'.\n"); 4313 else 4314 printf("hwpmc: kernel version (0x%x) does not match " 4315 "module version (0x%x).\n", pmc_kernel_version, 4316 PMC_VERSION); 4317 return EPROGMISMATCH; 4318 } 4319 4320 /* 4321 * check sysctl parameters 4322 */ 4323 4324 if (pmc_hashsize <= 0) { 4325 (void) printf("hwpmc: tunable \"hashsize\"=%d must be " 4326 "greater than zero.\n", pmc_hashsize); 4327 pmc_hashsize = PMC_HASH_SIZE; 4328 } 4329 4330 if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { 4331 (void) printf("hwpmc: tunable \"nsamples\"=%d out of " 4332 "range.\n", pmc_nsamples); 4333 pmc_nsamples = PMC_NSAMPLES; 4334 } 4335 4336 if (pmc_callchaindepth <= 0 || 4337 pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { 4338 (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " 4339 "range.\n", pmc_callchaindepth); 4340 pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; 4341 } 4342 4343 md = pmc_md_initialize(); 4344 4345 if (md == NULL || md->pmd_init == NULL) 4346 return ENOSYS; 4347 4348 maxcpu = pmc_cpu_max(); 4349 4350 /* allocate space for the per-cpu array */ 4351 pmc_pcpu = malloc(maxcpu * sizeof(struct pmc_cpu *), 4352 M_PMC, M_WAITOK|M_ZERO); 4353 4354 /* per-cpu 'saved values' for managing process-mode PMCs */ 4355 pmc_pcpu_saved = malloc(sizeof(pmc_value_t) * maxcpu * md->pmd_npmc, 4356 M_PMC, M_WAITOK); 4357 4358 /* Perform CPU-dependent initialization. */ 4359 pmc_save_cpu_binding(&pb); 4360 for (cpu = 0; cpu < maxcpu; cpu++) { 4361 if (!pmc_cpu_is_active(cpu)) 4362 continue; 4363 pmc_select_cpu(cpu); 4364 if ((error = md->pmd_init(cpu)) != 0) 4365 break; 4366 } 4367 pmc_restore_cpu_binding(&pb); 4368 4369 if (error != 0) 4370 return error; 4371 4372 /* allocate space for the sample array */ 4373 for (cpu = 0; cpu < maxcpu; cpu++) { 4374 if (!pmc_cpu_is_active(cpu)) 4375 continue; 4376 sb = malloc(sizeof(struct pmc_samplebuffer) + 4377 pmc_nsamples * sizeof(struct pmc_sample), M_PMC, 4378 M_WAITOK|M_ZERO); 4379 4380 sb->ps_read = sb->ps_write = sb->ps_samples; 4381 sb->ps_fence = sb->ps_samples + pmc_nsamples; 4382 KASSERT(pmc_pcpu[cpu] != NULL, 4383 ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); 4384 4385 sb->ps_callchains = malloc(pmc_callchaindepth * 4386 pmc_nsamples * sizeof(uintptr_t), 4387 M_PMC, M_WAITOK|M_ZERO); 4388 4389 for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) 4390 ps->ps_pc = sb->ps_callchains + 4391 (n * pmc_callchaindepth); 4392 4393 pmc_pcpu[cpu]->pc_sb = sb; 4394 } 4395 4396 /* allocate space for the row disposition array */ 4397 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, 4398 M_PMC, M_WAITOK|M_ZERO); 4399 4400 KASSERT(pmc_pmcdisp != NULL, 4401 ("[pmc,%d] pmcdisp allocation returned NULL", __LINE__)); 4402 4403 /* mark all PMCs as available */ 4404 for (n = 0; n < (int) md->pmd_npmc; n++) 4405 PMC_MARK_ROW_FREE(n); 4406 4407 /* allocate thread hash tables */ 4408 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, 4409 &pmc_ownerhashmask); 4410 4411 pmc_processhash = hashinit(pmc_hashsize, M_PMC, 4412 &pmc_processhashmask); 4413 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc-leaf", 4414 MTX_SPIN); 4415 4416 LIST_INIT(&pmc_ss_owners); 4417 pmc_ss_count = 0; 4418 4419 /* allocate a pool of spin mutexes */ 4420 pmc_mtxpool = mtx_pool_create("pmc-leaf", pmc_mtxpool_size, 4421 MTX_SPIN); 4422 4423 PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " 4424 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, 4425 pmc_processhash, pmc_processhashmask); 4426 4427 /* register process {exit,fork,exec} handlers */ 4428 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, 4429 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); 4430 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, 4431 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); 4432 4433 /* initialize logging */ 4434 pmclog_initialize(); 4435 4436 /* set hook functions */ 4437 pmc_intr = md->pmd_intr; 4438 pmc_hook = pmc_hook_handler; 4439 4440 if (error == 0) { 4441 printf(PMC_MODULE_NAME ":"); 4442 for (n = 0; n < (int) md->pmd_nclass; n++) { 4443 printf(" %s/%d/0x%b", 4444 pmc_name_of_pmcclass[md->pmd_classes[n].pm_class], 4445 md->pmd_nclasspmcs[n], 4446 md->pmd_classes[n].pm_caps, 4447 "\20" 4448 "\1INT\2USR\3SYS\4EDG\5THR" 4449 "\6REA\7WRI\10INV\11QUA\12PRC" 4450 "\13TAG\14CSC"); 4451 } 4452 printf("\n"); 4453 } 4454 4455 return error; 4456 } 4457 4458 /* prepare to be unloaded */ 4459 static void 4460 pmc_cleanup(void) 4461 { 4462 int cpu; 4463 unsigned int maxcpu; 4464 struct pmc_ownerhash *ph; 4465 struct pmc_owner *po, *tmp; 4466 struct pmc_binding pb; 4467 #ifdef DEBUG 4468 struct pmc_processhash *prh; 4469 #endif 4470 4471 PMCDBG(MOD,INI,0, "%s", "cleanup"); 4472 4473 /* switch off sampling */ 4474 atomic_store_rel_int(&pmc_cpumask, 0); 4475 pmc_intr = NULL; 4476 4477 sx_xlock(&pmc_sx); 4478 if (pmc_hook == NULL) { /* being unloaded already */ 4479 sx_xunlock(&pmc_sx); 4480 return; 4481 } 4482 4483 pmc_hook = NULL; /* prevent new threads from entering module */ 4484 4485 /* deregister event handlers */ 4486 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); 4487 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); 4488 4489 /* send SIGBUS to all owner threads, free up allocations */ 4490 if (pmc_ownerhash) 4491 for (ph = pmc_ownerhash; 4492 ph <= &pmc_ownerhash[pmc_ownerhashmask]; 4493 ph++) { 4494 LIST_FOREACH_SAFE(po, ph, po_next, tmp) { 4495 pmc_remove_owner(po); 4496 4497 /* send SIGBUS to owner processes */ 4498 PMCDBG(MOD,INI,2, "cleanup signal proc=%p " 4499 "(%d, %s)", po->po_owner, 4500 po->po_owner->p_pid, 4501 po->po_owner->p_comm); 4502 4503 PROC_LOCK(po->po_owner); 4504 psignal(po->po_owner, SIGBUS); 4505 PROC_UNLOCK(po->po_owner); 4506 4507 pmc_destroy_owner_descriptor(po); 4508 } 4509 } 4510 4511 /* reclaim allocated data structures */ 4512 if (pmc_mtxpool) 4513 mtx_pool_destroy(&pmc_mtxpool); 4514 4515 mtx_destroy(&pmc_processhash_mtx); 4516 if (pmc_processhash) { 4517 #ifdef DEBUG 4518 struct pmc_process *pp; 4519 4520 PMCDBG(MOD,INI,3, "%s", "destroy process hash"); 4521 for (prh = pmc_processhash; 4522 prh <= &pmc_processhash[pmc_processhashmask]; 4523 prh++) 4524 LIST_FOREACH(pp, prh, pp_next) 4525 PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); 4526 #endif 4527 4528 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); 4529 pmc_processhash = NULL; 4530 } 4531 4532 if (pmc_ownerhash) { 4533 PMCDBG(MOD,INI,3, "%s", "destroy owner hash"); 4534 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); 4535 pmc_ownerhash = NULL; 4536 } 4537 4538 KASSERT(LIST_EMPTY(&pmc_ss_owners), 4539 ("[pmc,%d] Global SS owner list not empty", __LINE__)); 4540 KASSERT(pmc_ss_count == 0, 4541 ("[pmc,%d] Global SS count not empty", __LINE__)); 4542 4543 /* Free the per-cpu sample buffers. */ 4544 maxcpu = pmc_cpu_max(); 4545 for (cpu = 0; cpu < maxcpu; cpu++) { 4546 if (!pmc_cpu_is_active(cpu)) 4547 continue; 4548 KASSERT(pmc_pcpu[cpu]->pc_sb != NULL, 4549 ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__, 4550 cpu)); 4551 free(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC); 4552 free(pmc_pcpu[cpu]->pc_sb, M_PMC); 4553 pmc_pcpu[cpu]->pc_sb = NULL; 4554 } 4555 4556 /* do processor dependent cleanup */ 4557 PMCDBG(MOD,INI,3, "%s", "md cleanup"); 4558 if (md) { 4559 pmc_save_cpu_binding(&pb); 4560 for (cpu = 0; cpu < maxcpu; cpu++) { 4561 PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", 4562 cpu, pmc_pcpu[cpu]); 4563 if (!pmc_cpu_is_active(cpu) || pmc_pcpu[cpu] == NULL) 4564 continue; 4565 pmc_select_cpu(cpu); 4566 if (md->pmd_cleanup) 4567 md->pmd_cleanup(cpu); 4568 } 4569 free(md, M_PMC); 4570 md = NULL; 4571 pmc_restore_cpu_binding(&pb); 4572 } 4573 4574 /* deallocate per-cpu structures */ 4575 free(pmc_pcpu, M_PMC); 4576 pmc_pcpu = NULL; 4577 4578 free(pmc_pcpu_saved, M_PMC); 4579 pmc_pcpu_saved = NULL; 4580 4581 if (pmc_pmcdisp) { 4582 free(pmc_pmcdisp, M_PMC); 4583 pmc_pmcdisp = NULL; 4584 } 4585 4586 pmclog_shutdown(); 4587 4588 sx_xunlock(&pmc_sx); /* we are done */ 4589 } 4590 4591 /* 4592 * The function called at load/unload. 4593 */ 4594 4595 static int 4596 load (struct module *module __unused, int cmd, void *arg __unused) 4597 { 4598 int error; 4599 4600 error = 0; 4601 4602 switch (cmd) { 4603 case MOD_LOAD : 4604 /* initialize the subsystem */ 4605 error = pmc_initialize(); 4606 if (error != 0) 4607 break; 4608 PMCDBG(MOD,INI,1, "syscall=%d maxcpu=%d", 4609 pmc_syscall_num, pmc_cpu_max()); 4610 break; 4611 4612 4613 case MOD_UNLOAD : 4614 case MOD_SHUTDOWN: 4615 pmc_cleanup(); 4616 PMCDBG(MOD,INI,1, "%s", "unloaded"); 4617 break; 4618 4619 default : 4620 error = EINVAL; /* XXX should panic(9) */ 4621 break; 4622 } 4623 4624 return error; 4625 } 4626 4627 /* memory pool */ 4628 MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); 4629