1 /*- 2 * Copyright (c) 2003-2005 Joseph Koshy 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/eventhandler.h> 33 #include <sys/jail.h> 34 #include <sys/kernel.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/module.h> 39 #include <sys/mutex.h> 40 #include <sys/pmc.h> 41 #include <sys/pmckern.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/sched.h> 45 #include <sys/signalvar.h> 46 #include <sys/smp.h> 47 #include <sys/sx.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/systm.h> 51 52 #include <machine/md_var.h> 53 54 /* 55 * Types 56 */ 57 58 enum pmc_flags { 59 PMC_FLAG_NONE = 0x00, /* do nothing */ 60 PMC_FLAG_REMOVE = 0x01, /* atomically remove entry from hash */ 61 PMC_FLAG_ALLOCATE = 0x02, /* add entry to hash if not found */ 62 }; 63 64 /* 65 * The offset in sysent where the syscall is allocated. 66 */ 67 68 static int pmc_syscall_num = NO_SYSCALL; 69 struct pmc_cpu **pmc_pcpu; /* per-cpu state */ 70 pmc_value_t *pmc_pcpu_saved; /* saved PMC values: CSW handling */ 71 72 #define PMC_PCPU_SAVED(C,R) pmc_pcpu_saved[(R) + md->pmd_npmc*(C)] 73 74 struct mtx_pool *pmc_mtxpool; 75 static int *pmc_pmcdisp; /* PMC row dispositions */ 76 77 #define PMC_ROW_DISP_IS_FREE(R) (pmc_pmcdisp[(R)] == 0) 78 #define PMC_ROW_DISP_IS_THREAD(R) (pmc_pmcdisp[(R)] > 0) 79 #define PMC_ROW_DISP_IS_STANDALONE(R) (pmc_pmcdisp[(R)] < 0) 80 81 #define PMC_MARK_ROW_FREE(R) do { \ 82 pmc_pmcdisp[(R)] = 0; \ 83 } while (0) 84 85 #define PMC_MARK_ROW_STANDALONE(R) do { \ 86 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 87 __LINE__)); \ 88 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 89 KASSERT(pmc_pmcdisp[(R)] >= (-mp_ncpus), ("[pmc,%d] row " \ 90 "disposition error", __LINE__)); \ 91 } while (0) 92 93 #define PMC_UNMARK_ROW_STANDALONE(R) do { \ 94 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 95 KASSERT(pmc_pmcdisp[(R)] <= 0, ("[pmc,%d] row disposition error", \ 96 __LINE__)); \ 97 } while (0) 98 99 #define PMC_MARK_ROW_THREAD(R) do { \ 100 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 101 __LINE__)); \ 102 atomic_add_int(&pmc_pmcdisp[(R)], 1); \ 103 } while (0) 104 105 #define PMC_UNMARK_ROW_THREAD(R) do { \ 106 atomic_add_int(&pmc_pmcdisp[(R)], -1); \ 107 KASSERT(pmc_pmcdisp[(R)] >= 0, ("[pmc,%d] row disposition error", \ 108 __LINE__)); \ 109 } while (0) 110 111 112 /* various event handlers */ 113 static eventhandler_tag pmc_exit_tag, pmc_fork_tag; 114 115 /* Module statistics */ 116 struct pmc_op_getdriverstats pmc_stats; 117 118 /* Machine/processor dependent operations */ 119 struct pmc_mdep *md; 120 121 /* 122 * Hash tables mapping owner processes and target threads to PMCs. 123 */ 124 125 struct mtx pmc_processhash_mtx; /* spin mutex */ 126 static u_long pmc_processhashmask; 127 static LIST_HEAD(pmc_processhash, pmc_process) *pmc_processhash; 128 129 /* 130 * Hash table of PMC owner descriptors. This table is protected by 131 * the shared PMC "sx" lock. 132 */ 133 134 static u_long pmc_ownerhashmask; 135 static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash; 136 137 /* 138 * Prototypes 139 */ 140 141 #if DEBUG 142 static int pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS); 143 static int pmc_debugflags_parse(char *newstr, char *fence); 144 #endif 145 146 static int load(struct module *module, int cmd, void *arg); 147 static int pmc_syscall_handler(struct thread *td, void *syscall_args); 148 static int pmc_configure_log(struct pmc_owner *po, int logfd); 149 static void pmc_log_process_exit(struct pmc *pm, struct pmc_process *pp); 150 static struct pmc *pmc_allocate_pmc_descriptor(void); 151 static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, 152 pmc_id_t pmc); 153 static void pmc_release_pmc_descriptor(struct pmc *pmc); 154 static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, 155 int cpu); 156 static struct pmc_process *pmc_find_process_descriptor(struct proc *p, 157 uint32_t mode); 158 static void pmc_remove_process_descriptor(struct pmc_process *pp); 159 static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); 160 static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); 161 static void pmc_force_context_switch(void); 162 static void pmc_remove_owner(struct pmc_owner *po); 163 static void pmc_maybe_remove_owner(struct pmc_owner *po); 164 static void pmc_unlink_target_process(struct pmc *pmc, 165 struct pmc_process *pp); 166 static void pmc_link_target_process(struct pmc *pm, 167 struct pmc_process *pp); 168 static void pmc_unlink_owner(struct pmc *pmc); 169 static void pmc_cleanup(void); 170 static void pmc_save_cpu_binding(struct pmc_binding *pb); 171 static void pmc_restore_cpu_binding(struct pmc_binding *pb); 172 static void pmc_select_cpu(int cpu); 173 static void pmc_process_exit(void *arg, struct proc *p); 174 static void pmc_process_fork(void *arg, struct proc *p1, 175 struct proc *p2, int n); 176 static int pmc_attach_one_process(struct proc *p, struct pmc *pm); 177 static int pmc_attach_process(struct proc *p, struct pmc *pm); 178 static int pmc_detach_one_process(struct proc *p, struct pmc *pm, 179 int flags); 180 static int pmc_detach_process(struct proc *p, struct pmc *pm); 181 static int pmc_start(struct pmc *pm); 182 static int pmc_stop(struct pmc *pm); 183 static int pmc_can_attach(struct pmc *pm, struct proc *p); 184 185 /* 186 * Kernel tunables and sysctl(8) interface. 187 */ 188 189 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." 190 191 SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters"); 192 193 #if DEBUG 194 unsigned int pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; 195 char pmc_debugstr[PMC_DEBUG_STRSIZE]; 196 TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr, 197 sizeof(pmc_debugstr)); 198 SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags, 199 CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_TUN, 200 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); 201 #endif 202 203 /* 204 * kern.pmc.hashrows -- determines the number of rows in the 205 * of the hash table used to look up threads 206 */ 207 208 static int pmc_hashsize = PMC_HASH_SIZE; 209 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "hashsize", &pmc_hashsize); 210 SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD, 211 &pmc_hashsize, 0, "rows in hash tables"); 212 213 /* 214 * kern.pmc.pcpusize -- the size of each per-cpu 215 * area for collection PC samples. 216 */ 217 218 static int pmc_pcpu_buffer_size = PMC_PCPU_BUFFER_SIZE; 219 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "pcpubuffersize", &pmc_pcpu_buffer_size); 220 SYSCTL_INT(_kern_hwpmc, OID_AUTO, pcpubuffersize, CTLFLAG_TUN|CTLFLAG_RD, 221 &pmc_pcpu_buffer_size, 0, "size of per-cpu buffer in 4K pages"); 222 223 /* 224 * kern.pmc.mtxpoolsize -- number of mutexes in the mutex pool. 225 */ 226 227 static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE; 228 TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "mtxpoolsize", &pmc_mtxpool_size); 229 SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD, 230 &pmc_mtxpool_size, 0, "size of spin mutex pool"); 231 232 233 234 /* 235 * security.bsd.unprivileged_syspmcs -- allow non-root processes to 236 * allocate system-wide PMCs. 237 * 238 * Allowing unprivileged processes to allocate system PMCs is convenient 239 * if system-wide measurements need to be taken concurrently with other 240 * per-process measurements. This feature is turned off by default. 241 */ 242 243 SYSCTL_DECL(_security_bsd); 244 245 static int pmc_unprivileged_syspmcs = 0; 246 TUNABLE_INT("security.bsd.unprivileged_syspmcs", &pmc_unprivileged_syspmcs); 247 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW, 248 &pmc_unprivileged_syspmcs, 0, 249 "allow unprivileged process to allocate system PMCs"); 250 251 #if PMC_HASH_USE_CRC32 252 253 #define PMC_HASH_PTR(P,M) (crc32(&(P), sizeof((P))) & (M)) 254 255 #else /* integer multiplication */ 256 257 #if LONG_BIT == 64 258 #define _PMC_HM 11400714819323198486u 259 #elif LONG_BIT == 32 260 #define _PMC_HM 2654435769u 261 #else 262 #error Must know the size of 'long' to compile 263 #endif 264 265 /* 266 * Hash function. Discard the lower 2 bits of the pointer since 267 * these are always zero for our uses. The hash multiplier is 268 * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). 269 */ 270 271 #define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M)) 272 273 #endif 274 275 /* 276 * Syscall structures 277 */ 278 279 /* The `sysent' for the new syscall */ 280 static struct sysent pmc_sysent = { 281 2, /* sy_narg */ 282 pmc_syscall_handler /* sy_call */ 283 }; 284 285 static struct syscall_module_data pmc_syscall_mod = { 286 load, 287 NULL, 288 &pmc_syscall_num, 289 &pmc_sysent, 290 { 0, NULL } 291 }; 292 293 static moduledata_t pmc_mod = { 294 PMC_MODULE_NAME, 295 syscall_module_handler, 296 &pmc_syscall_mod 297 }; 298 299 DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY); 300 MODULE_VERSION(pmc, PMC_VERSION); 301 302 #if DEBUG 303 static int 304 pmc_debugflags_parse(char *newstr, char *fence) 305 { 306 char c, *p, *q; 307 unsigned int tmpflags; 308 int level; 309 char tmpbuf[4]; /* 3 character keyword + '\0' */ 310 311 tmpflags = 0; 312 level = 0xF; /* max verbosity */ 313 314 p = newstr; 315 316 for (; p < fence && (c = *p);) { 317 318 /* skip separators */ 319 if (c == ' ' || c == '\t' || c == ',') { 320 p++; continue; 321 } 322 323 (void) strlcpy(tmpbuf, p, sizeof(tmpbuf)); 324 325 #define CMP_SET_FLAG_MAJ(S,F) \ 326 else if (strncmp(tmpbuf, S, 3) == 0) \ 327 tmpflags |= __PMCDFMAJ(F) 328 329 #define CMP_SET_FLAG_MIN(S,F) \ 330 else if (strncmp(tmpbuf, S, 3) == 0) \ 331 tmpflags |= __PMCDFMIN(F) 332 333 if (fence - p > 6 && strncmp(p, "level=", 6) == 0) { 334 p += 6; /* skip over keyword */ 335 level = strtoul(p, &q, 16); 336 } 337 CMP_SET_FLAG_MAJ("mod", MOD); 338 CMP_SET_FLAG_MAJ("pmc", PMC); 339 CMP_SET_FLAG_MAJ("ctx", CTX); 340 CMP_SET_FLAG_MAJ("own", OWN); 341 CMP_SET_FLAG_MAJ("prc", PRC); 342 CMP_SET_FLAG_MAJ("mdp", MDP); 343 CMP_SET_FLAG_MAJ("cpu", CPU); 344 345 CMP_SET_FLAG_MIN("all", ALL); 346 CMP_SET_FLAG_MIN("rel", REL); 347 CMP_SET_FLAG_MIN("ops", OPS); 348 CMP_SET_FLAG_MIN("ini", INI); 349 CMP_SET_FLAG_MIN("fnd", FND); 350 CMP_SET_FLAG_MIN("pmh", PMH); 351 CMP_SET_FLAG_MIN("pms", PMS); 352 CMP_SET_FLAG_MIN("orm", ORM); 353 CMP_SET_FLAG_MIN("omr", OMR); 354 CMP_SET_FLAG_MIN("tlk", TLK); 355 CMP_SET_FLAG_MIN("tul", TUL); 356 CMP_SET_FLAG_MIN("ext", EXT); 357 CMP_SET_FLAG_MIN("exc", EXC); 358 CMP_SET_FLAG_MIN("frk", FRK); 359 CMP_SET_FLAG_MIN("att", ATT); 360 CMP_SET_FLAG_MIN("swi", SWI); 361 CMP_SET_FLAG_MIN("swo", SWO); 362 CMP_SET_FLAG_MIN("reg", REG); 363 CMP_SET_FLAG_MIN("alr", ALR); 364 CMP_SET_FLAG_MIN("rea", REA); 365 CMP_SET_FLAG_MIN("wri", WRI); 366 CMP_SET_FLAG_MIN("cfg", CFG); 367 CMP_SET_FLAG_MIN("sta", STA); 368 CMP_SET_FLAG_MIN("sto", STO); 369 CMP_SET_FLAG_MIN("int", INT); 370 CMP_SET_FLAG_MIN("bnd", BND); 371 CMP_SET_FLAG_MIN("sel", SEL); 372 else /* unrecognized keyword */ 373 return EINVAL; 374 375 p += 4; /* skip keyword and separator */ 376 } 377 378 pmc_debugflags = (tmpflags|level); 379 380 return 0; 381 } 382 383 static int 384 pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS) 385 { 386 char *fence, *newstr; 387 int error; 388 unsigned int n; 389 390 (void) arg1; (void) arg2; /* unused parameters */ 391 392 n = sizeof(pmc_debugstr); 393 MALLOC(newstr, char *, n, M_PMC, M_ZERO|M_WAITOK); 394 (void) strlcpy(newstr, pmc_debugstr, sizeof(pmc_debugstr)); 395 396 error = sysctl_handle_string(oidp, newstr, n, req); 397 398 /* if there is a new string, parse and copy it */ 399 if (error == 0 && req->newptr != NULL) { 400 fence = newstr + (n < req->newlen ? n : req->newlen); 401 if ((error = pmc_debugflags_parse(newstr, fence)) == 0) 402 (void) strlcpy(pmc_debugstr, newstr, 403 sizeof(pmc_debugstr)); 404 } 405 406 FREE(newstr, M_PMC); 407 408 return error; 409 } 410 #endif 411 412 /* 413 * Concurrency Control 414 * 415 * The driver manages the following data structures: 416 * 417 * - target process descriptors, one per target process 418 * - owner process descriptors (and attached lists), one per owner process 419 * - lookup hash tables for owner and target processes 420 * - PMC descriptors (and attached lists) 421 * - per-cpu hardware state 422 * - the 'hook' variable through which the kernel calls into 423 * this module 424 * - the machine hardware state (managed by the MD layer) 425 * 426 * These data structures are accessed from: 427 * 428 * - thread context-switch code 429 * - interrupt handlers (possibly on multiple cpus) 430 * - kernel threads on multiple cpus running on behalf of user 431 * processes doing system calls 432 * - this driver's private kernel threads 433 * 434 * = Locks and Locking strategy = 435 * 436 * The driver uses four locking strategies for its operation: 437 * 438 * - There is a 'global' SX lock "pmc_sx" that is used to protect 439 * the its 'meta-data'. 440 * 441 * Calls into the module (via syscall() or by the kernel) start with 442 * this lock being held in exclusive mode. Depending on the requested 443 * operation, the lock may be downgraded to 'shared' mode to allow 444 * more concurrent readers into the module. 445 * 446 * This SX lock is held in exclusive mode for any operations that 447 * modify the linkages between the driver's internal data structures. 448 * 449 * The 'pmc_hook' function pointer is also protected by this lock. 450 * It is only examined with the sx lock held in exclusive mode. The 451 * kernel module is allowed to be unloaded only with the sx lock 452 * held in exclusive mode. In normal syscall handling, after 453 * acquiring the pmc_sx lock we first check that 'pmc_hook' is 454 * non-null before proceeding. This prevents races between the 455 * thread unloading the module and other threads seeking to use the 456 * module. 457 * 458 * - Lookups of target process structures and owner process structures 459 * cannot use the global "pmc_sx" SX lock because these lookups need 460 * to happen during context switches and in other critical sections 461 * where sleeping is not allowed. We protect these lookup tables 462 * with their own private spin-mutexes, "pmc_processhash_mtx" and 463 * "pmc_ownerhash_mtx". These are 'leaf' mutexes, in that no other 464 * lock is acquired with these locks held. 465 * 466 * - Interrupt handlers work in a lock free manner. At interrupt 467 * time, handlers look at the PMC pointer (phw->phw_pmc) configured 468 * when the PMC was started. If this pointer is NULL, the interrupt 469 * is ignored after updating driver statistics. We ensure that this 470 * pointer is set (using an atomic operation if necessary) before the 471 * PMC hardware is started. Conversely, this pointer is unset atomically 472 * only after the PMC hardware is stopped. 473 * 474 * We ensure that everything needed for the operation of an 475 * interrupt handler is available without it needing to acquire any 476 * locks. We also ensure that a PMC's software state is destroyed only 477 * after the PMC is taken off hardware (on all CPUs). 478 * 479 * - Context-switch handling with process-private PMCs needs more 480 * care. 481 * 482 * A given process may be the target of multiple PMCs. For example, 483 * PMCATTACH and PMCDETACH may be requested by a process on one CPU 484 * while the target process is running on another. A PMC could also 485 * be getting released because its owner is exiting. We tackle 486 * these situations in the following manner: 487 * 488 * - each target process structure 'pmc_process' has an array 489 * of 'struct pmc *' pointers, one for each hardware PMC. 490 * 491 * - At context switch IN time, each "target" PMC in RUNNING state 492 * gets started on hardware and a pointer to each PMC is copied into 493 * the per-cpu phw array. The 'runcount' for the PMC is 494 * incremented. 495 * 496 * - At context switch OUT time, all process-virtual PMCs are stopped 497 * on hardware. The saved value is added to the PMCs value field 498 * only if the PMC is in a non-deleted state (the PMCs state could 499 * have changed during the current time slice). 500 * 501 * Note that since in-between a switch IN on a processor and a switch 502 * OUT, the PMC could have been released on another CPU. Therefore 503 * context switch OUT always looks at the hardware state to turn 504 * OFF PMCs and will update a PMC's saved value only if reachable 505 * from the target process record. 506 * 507 * - OP PMCRELEASE could be called on a PMC at any time (the PMC could 508 * be attached to many processes at the time of the call and could 509 * be active on multiple CPUs). 510 * 511 * We prevent further scheduling of the PMC by marking it as in 512 * state 'DELETED'. If the runcount of the PMC is non-zero then 513 * this PMC is currently running on a CPU somewhere. The thread 514 * doing the PMCRELEASE operation waits by repeatedly doing an 515 * tsleep() till the runcount comes to zero. 516 * 517 */ 518 519 /* 520 * save the cpu binding of the current kthread 521 */ 522 523 static void 524 pmc_save_cpu_binding(struct pmc_binding *pb) 525 { 526 PMCDBG(CPU,BND,2, "%s", "save-cpu"); 527 mtx_lock_spin(&sched_lock); 528 pb->pb_bound = sched_is_bound(curthread); 529 pb->pb_cpu = curthread->td_oncpu; 530 mtx_unlock_spin(&sched_lock); 531 PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); 532 } 533 534 /* 535 * restore the cpu binding of the current thread 536 */ 537 538 static void 539 pmc_restore_cpu_binding(struct pmc_binding *pb) 540 { 541 PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", 542 curthread->td_oncpu, pb->pb_cpu); 543 mtx_lock_spin(&sched_lock); 544 if (pb->pb_bound) 545 sched_bind(curthread, pb->pb_cpu); 546 else 547 sched_unbind(curthread); 548 mtx_unlock_spin(&sched_lock); 549 PMCDBG(CPU,BND,2, "%s", "restore-cpu done"); 550 } 551 552 /* 553 * move execution over the specified cpu and bind it there. 554 */ 555 556 static void 557 pmc_select_cpu(int cpu) 558 { 559 KASSERT(cpu >= 0 && cpu < mp_ncpus, 560 ("[pmc,%d] bad cpu number %d", __LINE__, cpu)); 561 562 /* never move to a disabled CPU */ 563 KASSERT(pmc_cpu_is_disabled(cpu) == 0, ("[pmc,%d] selecting " 564 "disabled CPU %d", __LINE__, cpu)); 565 566 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu); 567 mtx_lock_spin(&sched_lock); 568 sched_bind(curthread, cpu); 569 mtx_unlock_spin(&sched_lock); 570 571 KASSERT(curthread->td_oncpu == cpu, 572 ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, 573 cpu, curthread->td_oncpu)); 574 575 PMCDBG(CPU,SEL,2, "select-cpu cpu=%d ok", cpu); 576 } 577 578 /* 579 * Force a context switch. 580 * 581 * We do this by tsleep'ing for 1 tick -- invoking mi_switch() is not 582 * guaranteed to force a context switch. 583 */ 584 585 static void 586 pmc_force_context_switch(void) 587 { 588 u_char curpri; 589 590 mtx_lock_spin(&sched_lock); 591 curpri = curthread->td_priority; 592 mtx_unlock_spin(&sched_lock); 593 594 (void) tsleep((void *) pmc_force_context_switch, curpri, 595 "pmcctx", 1); 596 597 } 598 599 /* 600 * Update the per-pmc histogram 601 */ 602 603 void 604 pmc_update_histogram(struct pmc_hw *phw, uintptr_t pc) 605 { 606 (void) phw; 607 (void) pc; 608 } 609 610 /* 611 * Send a signal to a process. This is meant to be invoked from an 612 * interrupt handler. 613 */ 614 615 void 616 pmc_send_signal(struct pmc *pmc) 617 { 618 (void) pmc; /* shutup gcc */ 619 620 #if 0 621 struct proc *proc; 622 struct thread *td; 623 624 KASSERT(pmc->pm_owner != NULL, 625 ("[pmc,%d] No owner for PMC", __LINE__)); 626 627 KASSERT((pmc->pm_owner->po_flags & PMC_FLAG_IS_OWNER) && 628 (pmc->pm_owner->po_flags & PMC_FLAG_HAS_TS_PMC), 629 ("[pmc,%d] interrupting PMC owner has wrong flags 0x%x", 630 __LINE__, pmc->pm_owner->po_flags)); 631 632 proc = pmc->pm_owner->po_owner; 633 634 KASSERT(curthread->td_proc == proc, 635 ("[pmc,%d] interruping the wrong thread (owner %p, " 636 "cur %p)", __LINE__, (void *) proc, curthread->td_proc)); 637 638 mtx_lock_spin(&sched_lock); 639 td = TAILQ_FIRST(&proc->p_threads); 640 mtx_unlock_spin(&sched_lock); 641 /* XXX RACE HERE: can 'td' disappear now? */ 642 trapsignal(td, SIGPROF, 0); 643 /* XXX rework this to use the regular 'psignal' interface from a 644 helper thread */ 645 #endif 646 647 } 648 649 /* 650 * remove an process owning PMCs 651 */ 652 653 void 654 pmc_remove_owner(struct pmc_owner *po) 655 { 656 struct pmc_list *pl, *tmp; 657 658 sx_assert(&pmc_sx, SX_XLOCKED); 659 660 PMCDBG(OWN,ORM,1, "remove-owner po=%p", po); 661 662 /* Remove descriptor from the owner hash table */ 663 LIST_REMOVE(po, po_next); 664 665 /* pass 1: release all owned PMC descriptors */ 666 LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) { 667 668 PMCDBG(OWN,ORM,2, "pl=%p pmc=%p", pl, pl->pl_pmc); 669 670 /* remove the associated PMC descriptor, if present */ 671 if (pl->pl_pmc) 672 pmc_release_pmc_descriptor(pl->pl_pmc); 673 674 /* remove the linked list entry */ 675 LIST_REMOVE(pl, pl_next); 676 FREE(pl, M_PMC); 677 } 678 679 /* pass 2: delete the pmc_list chain */ 680 LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) { 681 KASSERT(pl->pl_pmc == NULL, 682 ("[pmc,%d] non-null pmc pointer", __LINE__)); 683 LIST_REMOVE(pl, pl_next); 684 FREE(pl, M_PMC); 685 } 686 687 KASSERT(LIST_EMPTY(&po->po_pmcs), 688 ("[pmc,%d] PMC list not empty", __LINE__)); 689 690 691 /* 692 * If this process owns a log file used for system wide logging, 693 * remove the log file. 694 * 695 * XXX rework needed. 696 */ 697 698 if (po->po_flags & PMC_PO_OWNS_LOGFILE) 699 pmc_configure_log(po, -1); 700 701 } 702 703 /* 704 * remove an owner process record if all conditions are met. 705 */ 706 707 static void 708 pmc_maybe_remove_owner(struct pmc_owner *po) 709 { 710 711 PMCDBG(OWN,OMR,1, "maybe-remove-owner po=%p", po); 712 713 /* 714 * Remove owner record if 715 * - this process does not own any PMCs 716 * - this process has not allocated a system-wide sampling buffer 717 */ 718 719 if (LIST_EMPTY(&po->po_pmcs) && 720 ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { 721 pmc_remove_owner(po); 722 FREE(po, M_PMC); 723 } 724 } 725 726 /* 727 * Add an association between a target process and a PMC. 728 */ 729 730 static void 731 pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) 732 { 733 int ri; 734 struct pmc_target *pt; 735 736 sx_assert(&pmc_sx, SX_XLOCKED); 737 738 KASSERT(pm != NULL && pp != NULL, 739 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 740 741 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < ((int) md->pmd_npmc - 1), 742 ("[pmc,%d] Illegal reference count %d for process record %p", 743 __LINE__, pp->pp_refcnt, (void *) pp)); 744 745 ri = PMC_TO_ROWINDEX(pm); 746 747 PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", 748 pm, ri, pp); 749 750 #if DEBUG 751 LIST_FOREACH(pt, &pm->pm_targets, pt_next) 752 if (pt->pt_process == pp) 753 KASSERT(0, ("[pmc,%d] pp %p already in pmc %p targets", 754 __LINE__, pp, pm)); 755 #endif 756 757 MALLOC(pt, struct pmc_target *, sizeof(struct pmc_target), 758 M_PMC, M_ZERO|M_WAITOK); 759 760 pt->pt_process = pp; 761 762 LIST_INSERT_HEAD(&pm->pm_targets, pt, pt_next); 763 764 atomic_store_rel_ptr(&pp->pp_pmcs[ri].pp_pmc, pm); 765 766 if (pm->pm_owner->po_owner == pp->pp_proc) 767 pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; 768 769 pp->pp_refcnt++; 770 771 } 772 773 /* 774 * Removes the association between a target process and a PMC. 775 */ 776 777 static void 778 pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) 779 { 780 int ri; 781 struct pmc_target *ptgt; 782 783 sx_assert(&pmc_sx, SX_XLOCKED); 784 785 KASSERT(pm != NULL && pp != NULL, 786 ("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp)); 787 788 KASSERT(pp->pp_refcnt >= 1 && pp->pp_refcnt < (int) md->pmd_npmc, 789 ("[pmc,%d] Illegal ref count %d on process record %p", 790 __LINE__, pp->pp_refcnt, (void *) pp)); 791 792 ri = PMC_TO_ROWINDEX(pm); 793 794 PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", 795 pm, ri, pp); 796 797 KASSERT(pp->pp_pmcs[ri].pp_pmc == pm, 798 ("[pmc,%d] PMC ri %d mismatch pmc %p pp->[ri] %p", __LINE__, 799 ri, pm, pp->pp_pmcs[ri].pp_pmc)); 800 801 pp->pp_pmcs[ri].pp_pmc = NULL; 802 pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; 803 804 /* Remove owner-specific flags */ 805 if (pm->pm_owner->po_owner == pp->pp_proc) { 806 pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; 807 pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; 808 } 809 810 pp->pp_refcnt--; 811 812 /* Remove the target process from the PMC structure */ 813 LIST_FOREACH(ptgt, &pm->pm_targets, pt_next) 814 if (ptgt->pt_process == pp) 815 break; 816 817 KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " 818 "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); 819 820 LIST_REMOVE(ptgt, pt_next); 821 FREE(ptgt, M_PMC); 822 } 823 824 /* 825 * Remove PMC descriptor 'pmc' from the owner descriptor. 826 */ 827 828 void 829 pmc_unlink_owner(struct pmc *pm) 830 { 831 struct pmc_list *pl, *tmp; 832 struct pmc_owner *po; 833 834 #if DEBUG 835 KASSERT(LIST_EMPTY(&pm->pm_targets), 836 ("[pmc,%d] unlinking PMC with targets", __LINE__)); 837 #endif 838 839 po = pm->pm_owner; 840 841 KASSERT(po != NULL, ("[pmc,%d] No owner for PMC", __LINE__)); 842 843 LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) { 844 if (pl->pl_pmc == pm) { 845 pl->pl_pmc = NULL; 846 pm->pm_owner = NULL; 847 return; 848 } 849 } 850 851 KASSERT(0, ("[pmc,%d] couldn't find pmc in owner list", __LINE__)); 852 } 853 854 /* 855 * Check if PMC 'pm' may be attached to target process 't'. 856 */ 857 858 static int 859 pmc_can_attach(struct pmc *pm, struct proc *t) 860 { 861 struct proc *o; /* pmc owner */ 862 struct ucred *oc, *tc; /* owner, target credentials */ 863 int decline_attach, i; 864 865 /* 866 * A PMC's owner can always attach that PMC to itself. 867 */ 868 869 if ((o = pm->pm_owner->po_owner) == t) 870 return 0; 871 872 PROC_LOCK(o); 873 oc = o->p_ucred; 874 crhold(oc); 875 PROC_UNLOCK(o); 876 877 PROC_LOCK(t); 878 tc = t->p_ucred; 879 crhold(tc); 880 PROC_UNLOCK(t); 881 882 /* 883 * The effective uid of the PMC owner should match at least one 884 * of the {effective,real,saved} uids of the target process. 885 */ 886 887 decline_attach = oc->cr_uid != tc->cr_uid && 888 oc->cr_uid != tc->cr_svuid && 889 oc->cr_uid != tc->cr_ruid; 890 891 /* 892 * Every one of the target's group ids, must be in the owner's 893 * group list. 894 */ 895 for (i = 0; !decline_attach && i < tc->cr_ngroups; i++) 896 decline_attach = !groupmember(tc->cr_groups[i], oc); 897 898 /* check the read and saved gids too */ 899 if (decline_attach == 0) 900 decline_attach = !groupmember(tc->cr_rgid, oc) || 901 !groupmember(tc->cr_svgid, oc); 902 903 crfree(tc); 904 crfree(oc); 905 906 return !decline_attach; 907 } 908 909 /* 910 * Attach a process to a PMC. 911 */ 912 913 static int 914 pmc_attach_one_process(struct proc *p, struct pmc *pm) 915 { 916 int ri; 917 struct pmc_process *pp; 918 919 sx_assert(&pmc_sx, SX_XLOCKED); 920 921 PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, 922 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 923 924 /* 925 * Locate the process descriptor corresponding to process 'p', 926 * allocating space as needed. 927 * 928 * Verify that rowindex 'pm_rowindex' is free in the process 929 * descriptor. 930 * 931 * If not, allocate space for a descriptor and link the 932 * process descriptor and PMC. 933 */ 934 935 ri = PMC_TO_ROWINDEX(pm); 936 937 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) 938 return ENOMEM; 939 940 if (pp->pp_pmcs[ri].pp_pmc == pm) /* already present at slot [ri] */ 941 return EEXIST; 942 943 if (pp->pp_pmcs[ri].pp_pmc != NULL) 944 return EBUSY; 945 946 pmc_link_target_process(pm, pp); 947 948 /* mark process as using HWPMCs */ 949 PROC_LOCK(p); 950 p->p_flag |= P_HWPMC; 951 PROC_UNLOCK(p); 952 953 return 0; 954 } 955 956 /* 957 * Attach a process and optionally its children 958 */ 959 960 static int 961 pmc_attach_process(struct proc *p, struct pmc *pm) 962 { 963 int error; 964 struct proc *top; 965 966 sx_assert(&pmc_sx, SX_XLOCKED); 967 968 PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, 969 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 970 971 972 /* 973 * If this PMC successfully allowed a GETMSR operation 974 * in the past, disallow further ATTACHes. 975 */ 976 977 if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) 978 return EPERM; 979 980 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 981 return pmc_attach_one_process(p, pm); 982 983 /* 984 * Traverse all child processes, attaching them to 985 * this PMC. 986 */ 987 988 sx_slock(&proctree_lock); 989 990 top = p; 991 992 for (;;) { 993 if ((error = pmc_attach_one_process(p, pm)) != 0) 994 break; 995 if (!LIST_EMPTY(&p->p_children)) 996 p = LIST_FIRST(&p->p_children); 997 else for (;;) { 998 if (p == top) 999 goto done; 1000 if (LIST_NEXT(p, p_sibling)) { 1001 p = LIST_NEXT(p, p_sibling); 1002 break; 1003 } 1004 p = p->p_pptr; 1005 } 1006 } 1007 1008 if (error) 1009 (void) pmc_detach_process(top, pm); 1010 1011 done: 1012 sx_sunlock(&proctree_lock); 1013 return error; 1014 } 1015 1016 /* 1017 * Detach a process from a PMC. If there are no other PMCs tracking 1018 * this process, remove the process structure from its hash table. If 1019 * 'flags' contains PMC_FLAG_REMOVE, then free the process structure. 1020 */ 1021 1022 static int 1023 pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) 1024 { 1025 int ri; 1026 struct pmc_process *pp; 1027 1028 sx_assert(&pmc_sx, SX_XLOCKED); 1029 1030 KASSERT(pm != NULL, 1031 ("[pmc,%d] null pm pointer", __LINE__)); 1032 1033 ri = PMC_TO_ROWINDEX(pm); 1034 1035 PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", 1036 pm, ri, p, p->p_pid, p->p_comm, flags); 1037 1038 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) 1039 return ESRCH; 1040 1041 if (pp->pp_pmcs[ri].pp_pmc != pm) 1042 return EINVAL; 1043 1044 pmc_unlink_target_process(pm, pp); 1045 1046 /* 1047 * If there are no PMCs targetting this process, we remove its 1048 * descriptor from the target hash table and unset the P_HWPMC 1049 * flag in the struct proc. 1050 */ 1051 1052 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1053 ("[pmc,%d] Illegal refcnt %d for process struct %p", 1054 __LINE__, pp->pp_refcnt, pp)); 1055 1056 if (pp->pp_refcnt != 0) /* still a target of some PMC */ 1057 return 0; 1058 1059 pmc_remove_process_descriptor(pp); 1060 1061 if (flags & PMC_FLAG_REMOVE) 1062 FREE(pp, M_PMC); 1063 1064 PROC_LOCK(p); 1065 p->p_flag &= ~P_HWPMC; 1066 PROC_UNLOCK(p); 1067 1068 return 0; 1069 } 1070 1071 /* 1072 * Detach a process and optionally its descendants from a PMC. 1073 */ 1074 1075 static int 1076 pmc_detach_process(struct proc *p, struct pmc *pm) 1077 { 1078 struct proc *top; 1079 1080 sx_assert(&pmc_sx, SX_XLOCKED); 1081 1082 PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, 1083 PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); 1084 1085 if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) 1086 return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1087 1088 /* 1089 * Traverse all children, detaching them from this PMC. We 1090 * ignore errors since we could be detaching a PMC from a 1091 * partially attached proc tree. 1092 */ 1093 1094 sx_slock(&proctree_lock); 1095 1096 top = p; 1097 1098 for (;;) { 1099 (void) pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); 1100 1101 if (!LIST_EMPTY(&p->p_children)) 1102 p = LIST_FIRST(&p->p_children); 1103 else for (;;) { 1104 if (p == top) 1105 goto done; 1106 if (LIST_NEXT(p, p_sibling)) { 1107 p = LIST_NEXT(p, p_sibling); 1108 break; 1109 } 1110 p = p->p_pptr; 1111 } 1112 } 1113 1114 done: 1115 sx_sunlock(&proctree_lock); 1116 return 0; 1117 } 1118 1119 /* 1120 * The 'hook' invoked from the kernel proper 1121 */ 1122 1123 1124 #if DEBUG 1125 const char *pmc_hooknames[] = { 1126 "", 1127 "EXIT", 1128 "EXEC", 1129 "FORK", 1130 "CSW-IN", 1131 "CSW-OUT" 1132 }; 1133 #endif 1134 1135 static int 1136 pmc_hook_handler(struct thread *td, int function, void *arg) 1137 { 1138 1139 KASSERT(td->td_proc->p_flag & P_HWPMC, 1140 ("[pmc,%d] unregistered thread called pmc_hook()", __LINE__)); 1141 1142 PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function, 1143 pmc_hooknames[function], arg); 1144 1145 switch (function) 1146 { 1147 1148 /* 1149 * Process exit. 1150 * 1151 * Remove this process from all hash tables. If this process 1152 * owned any PMCs, turn off those PMCs and deallocate them, 1153 * removing any associations with target processes. 1154 * 1155 * This function will be called by the last 'thread' of a 1156 * process. 1157 * 1158 */ 1159 1160 case PMC_FN_PROCESS_EXIT: /* release PMCs */ 1161 { 1162 int cpu; 1163 unsigned int ri; 1164 struct pmc *pm; 1165 struct pmc_process *pp; 1166 struct pmc_owner *po; 1167 struct proc *p; 1168 pmc_value_t newvalue, tmp; 1169 1170 sx_assert(&pmc_sx, SX_XLOCKED); 1171 1172 p = (struct proc *) arg; 1173 1174 /* 1175 * Since this code is invoked by the last thread in an 1176 * exiting process, we would have context switched IN 1177 * at some prior point. Kernel mode context switches 1178 * may happen any time, so we want to disable a context 1179 * switch OUT till we get any PMCs targetting this 1180 * process off the hardware. 1181 * 1182 * We also need to atomically remove this process' 1183 * entry from our target process hash table, using 1184 * PMC_FLAG_REMOVE. 1185 */ 1186 1187 PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid, 1188 p->p_comm); 1189 1190 critical_enter(); /* no preemption */ 1191 1192 cpu = curthread->td_oncpu; 1193 1194 if ((pp = pmc_find_process_descriptor(p, 1195 PMC_FLAG_REMOVE)) != NULL) { 1196 1197 PMCDBG(PRC,EXT,2, 1198 "process-exit proc=%p pmc-process=%p", p, pp); 1199 1200 /* 1201 * The exiting process could the target of 1202 * some PMCs which will be running on 1203 * currently executing CPU. 1204 * 1205 * We need to turn these PMCs off like we 1206 * would do at context switch OUT time. 1207 */ 1208 1209 for (ri = 0; ri < md->pmd_npmc; ri++) { 1210 1211 /* 1212 * Pick up the pmc pointer from hardware 1213 * state similar to the CSW_OUT code. 1214 */ 1215 1216 pm = NULL; 1217 (void) (*md->pmd_get_config)(cpu, ri, &pm); 1218 1219 PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm); 1220 1221 if (pm == NULL || 1222 !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 1223 continue; 1224 1225 PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " 1226 "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, 1227 pm, pm->pm_state); 1228 1229 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1230 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 1231 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1232 1233 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 1234 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", 1235 __LINE__, pm, ri, 1236 pp->pp_pmcs[ri].pp_pmc)); 1237 1238 (void) md->pmd_stop_pmc(cpu, ri); 1239 1240 KASSERT(pm->pm_runcount > 0, 1241 ("[pmc,%d] bad runcount ri %d rc %d", 1242 __LINE__, ri, pm->pm_runcount)); 1243 1244 if (pm->pm_state == PMC_STATE_RUNNING) { 1245 md->pmd_read_pmc(cpu, ri, &newvalue); 1246 tmp = newvalue - 1247 PMC_PCPU_SAVED(cpu,ri); 1248 1249 mtx_pool_lock_spin(pmc_mtxpool, pm); 1250 pm->pm_gv.pm_savedvalue += tmp; 1251 pp->pp_pmcs[ri].pp_pmcval += tmp; 1252 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1253 } 1254 1255 atomic_subtract_rel_32(&pm->pm_runcount,1); 1256 1257 KASSERT((int) pm->pm_runcount >= 0, 1258 ("[pmc,%d] runcount is %d", __LINE__, ri)); 1259 1260 (void) md->pmd_config_pmc(cpu, ri, NULL); 1261 } 1262 1263 /* 1264 * Inform the MD layer of this pseudo "context switch 1265 * out" 1266 */ 1267 1268 (void) md->pmd_switch_out(pmc_pcpu[cpu], pp); 1269 1270 critical_exit(); /* ok to be pre-empted now */ 1271 1272 /* 1273 * Unlink this process from the PMCs that are 1274 * targetting it. Log value at exit() time if 1275 * requested. 1276 */ 1277 1278 for (ri = 0; ri < md->pmd_npmc; ri++) 1279 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) { 1280 if (pm->pm_flags & 1281 PMC_F_LOG_TC_PROCEXIT) 1282 pmc_log_process_exit(pm, pp); 1283 pmc_unlink_target_process(pm, pp); 1284 } 1285 1286 FREE(pp, M_PMC); 1287 1288 1289 } else 1290 critical_exit(); /* pp == NULL */ 1291 1292 /* 1293 * If the process owned PMCs, free them up and free up 1294 * memory. 1295 */ 1296 1297 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 1298 pmc_remove_owner(po); 1299 FREE(po, M_PMC); 1300 } 1301 1302 } 1303 break; 1304 1305 /* 1306 * Process exec() 1307 */ 1308 1309 case PMC_FN_PROCESS_EXEC: 1310 { 1311 int *credentials_changed; 1312 unsigned int ri; 1313 struct pmc *pm; 1314 struct proc *p; 1315 struct pmc_owner *po; 1316 struct pmc_process *pp; 1317 1318 sx_assert(&pmc_sx, SX_XLOCKED); 1319 1320 /* 1321 * PMCs are not inherited across an exec(): remove any 1322 * PMCs that this process is the owner of. 1323 */ 1324 1325 p = td->td_proc; 1326 1327 if ((po = pmc_find_owner_descriptor(p)) != NULL) { 1328 pmc_remove_owner(po); 1329 FREE(po, M_PMC); 1330 } 1331 1332 /* 1333 * If this process is the target of a PMC, check if the new 1334 * credentials are compatible with the owner's permissions. 1335 */ 1336 1337 if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) 1338 break; 1339 1340 credentials_changed = arg; 1341 1342 PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d", 1343 p, p->p_pid, p->p_comm, *credentials_changed); 1344 1345 if (*credentials_changed == 0) /* credentials didn't change */ 1346 break; 1347 1348 /* 1349 * If the newly exec()'ed process has a different credential 1350 * than before, allow it to be the target of a PMC only if 1351 * the PMC's owner has sufficient priviledge. 1352 */ 1353 1354 for (ri = 0; ri < md->pmd_npmc; ri++) 1355 if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) 1356 if (pmc_can_attach(pm, td->td_proc) != 0) 1357 pmc_detach_one_process(td->td_proc, 1358 pm, PMC_FLAG_NONE); 1359 1360 KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc, 1361 ("[pmc,%d] Illegal ref count %d on pp %p", __LINE__, 1362 pp->pp_refcnt, pp)); 1363 1364 /* 1365 * If this process is no longer the target of any 1366 * PMCs, we can remove the process entry and free 1367 * up space. 1368 */ 1369 1370 if (pp->pp_refcnt == 0) { 1371 pmc_remove_process_descriptor(pp); 1372 FREE(pp, M_PMC); 1373 } 1374 } 1375 break; 1376 1377 /* 1378 * Process fork() 1379 */ 1380 1381 case PMC_FN_PROCESS_FORK: 1382 { 1383 unsigned int ri; 1384 uint32_t do_descendants; 1385 struct pmc *pm; 1386 struct pmc_process *ppnew, *ppold; 1387 struct proc *newproc; 1388 1389 sx_assert(&pmc_sx, SX_XLOCKED); 1390 1391 newproc = (struct proc *) arg; 1392 1393 PMCDBG(PMC,FRK,2, "process-fork p1=%p p2=%p", 1394 curthread->td_proc, newproc); 1395 /* 1396 * If the parent process (curthread->td_proc) is a 1397 * target of any PMCs, look for PMCs that are to be 1398 * inherited, and link these into the new process 1399 * descriptor. 1400 */ 1401 1402 if ((ppold = pmc_find_process_descriptor( 1403 curthread->td_proc, PMC_FLAG_NONE)) == NULL) 1404 break; 1405 1406 do_descendants = 0; 1407 for (ri = 0; ri < md->pmd_npmc; ri++) 1408 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL) 1409 do_descendants |= 1410 pm->pm_flags & PMC_F_DESCENDANTS; 1411 if (do_descendants == 0) /* nothing to do */ 1412 break; 1413 1414 if ((ppnew = pmc_find_process_descriptor(newproc, 1415 PMC_FLAG_ALLOCATE)) == NULL) 1416 return ENOMEM; 1417 1418 /* 1419 * Run through all PMCs targeting the old process and 1420 * attach them to the new process. 1421 */ 1422 1423 for (ri = 0; ri < md->pmd_npmc; ri++) 1424 if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL && 1425 pm->pm_flags & PMC_F_DESCENDANTS) 1426 pmc_link_target_process(pm, ppnew); 1427 1428 /* 1429 * Now mark the new process as being tracked by this 1430 * driver. 1431 */ 1432 1433 PROC_LOCK(newproc); 1434 newproc->p_flag |= P_HWPMC; 1435 PROC_UNLOCK(newproc); 1436 1437 } 1438 break; 1439 1440 /* 1441 * Thread context switch IN 1442 */ 1443 1444 case PMC_FN_CSW_IN: 1445 { 1446 int cpu; 1447 unsigned int ri; 1448 struct pmc *pm; 1449 struct proc *p; 1450 struct pmc_cpu *pc; 1451 struct pmc_hw *phw; 1452 struct pmc_process *pp; 1453 pmc_value_t newvalue; 1454 1455 p = td->td_proc; 1456 1457 if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL) 1458 break; 1459 1460 KASSERT(pp->pp_proc == td->td_proc, 1461 ("[pmc,%d] not my thread state", __LINE__)); 1462 1463 critical_enter(); /* no preemption on this CPU */ 1464 1465 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1466 1467 PMCDBG(CTX,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1468 p->p_pid, p->p_comm, pp); 1469 1470 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1471 ("[pmc,%d] wierd CPU id %d", __LINE__, cpu)); 1472 1473 pc = pmc_pcpu[cpu]; 1474 1475 for (ri = 0; ri < md->pmd_npmc; ri++) { 1476 1477 if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) 1478 continue; 1479 1480 KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), 1481 ("[pmc,%d] Target PMC in non-virtual mode (%d)", 1482 __LINE__, PMC_TO_MODE(pm))); 1483 1484 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1485 ("[pmc,%d] Row index mismatch pmc %d != ri %d", 1486 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1487 1488 /* 1489 * Only PMCs that are marked as 'RUNNING' need 1490 * be placed on hardware. 1491 */ 1492 1493 if (pm->pm_state != PMC_STATE_RUNNING) 1494 continue; 1495 1496 /* increment PMC runcount */ 1497 atomic_add_rel_32(&pm->pm_runcount, 1); 1498 1499 /* configure the HWPMC we are going to use. */ 1500 md->pmd_config_pmc(cpu, ri, pm); 1501 1502 phw = pc->pc_hwpmcs[ri]; 1503 1504 KASSERT(phw != NULL, 1505 ("[pmc,%d] null hw pointer", __LINE__)); 1506 1507 KASSERT(phw->phw_pmc == pm, 1508 ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__, 1509 phw->phw_pmc, pm)); 1510 1511 /* write out saved value and start the PMC */ 1512 mtx_pool_lock_spin(pmc_mtxpool, pm); 1513 newvalue = PMC_PCPU_SAVED(cpu, ri) = 1514 pm->pm_gv.pm_savedvalue; 1515 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1516 1517 md->pmd_write_pmc(cpu, ri, newvalue); 1518 md->pmd_start_pmc(cpu, ri); 1519 1520 } 1521 1522 /* 1523 * perform any other architecture/cpu dependent thread 1524 * switch-in actions. 1525 */ 1526 1527 (void) (*md->pmd_switch_in)(pc, pp); 1528 1529 critical_exit(); 1530 1531 } 1532 break; 1533 1534 /* 1535 * Thread context switch OUT. 1536 */ 1537 1538 case PMC_FN_CSW_OUT: 1539 { 1540 int cpu; 1541 unsigned int ri; 1542 struct pmc *pm; 1543 struct proc *p; 1544 struct pmc_cpu *pc; 1545 struct pmc_process *pp; 1546 pmc_value_t newvalue, tmp; 1547 1548 /* 1549 * Locate our process descriptor; this may be NULL if 1550 * this process is exiting and we have already removed 1551 * the process from the target process table. 1552 * 1553 * Note that due to kernel preemption, multiple 1554 * context switches may happen while the process is 1555 * exiting. 1556 * 1557 * Note also that if the target process cannot be 1558 * found we still need to deconfigure any PMCs that 1559 * are currently running on hardware. 1560 */ 1561 1562 p = td->td_proc; 1563 pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE); 1564 1565 /* 1566 * save PMCs 1567 */ 1568 1569 critical_enter(); 1570 1571 cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */ 1572 1573 PMCDBG(CTX,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p, 1574 p->p_pid, p->p_comm, pp); 1575 1576 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1577 ("[pmc,%d wierd CPU id %d", __LINE__, cpu)); 1578 1579 pc = pmc_pcpu[cpu]; 1580 1581 /* 1582 * When a PMC gets unlinked from a target PMC, it will 1583 * be removed from the target's pp_pmc[] array. 1584 * 1585 * However, on a MP system, the target could have been 1586 * executing on another CPU at the time of the unlink. 1587 * So, at context switch OUT time, we need to look at 1588 * the hardware to determine if a PMC is scheduled on 1589 * it. 1590 */ 1591 1592 for (ri = 0; ri < md->pmd_npmc; ri++) { 1593 1594 pm = NULL; 1595 (void) (*md->pmd_get_config)(cpu, ri, &pm); 1596 1597 if (pm == NULL) /* nothing at this row index */ 1598 continue; 1599 1600 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 1601 continue; /* not a process virtual PMC */ 1602 1603 KASSERT(PMC_TO_ROWINDEX(pm) == ri, 1604 ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", 1605 __LINE__, PMC_TO_ROWINDEX(pm), ri)); 1606 1607 /* Stop hardware */ 1608 md->pmd_stop_pmc(cpu, ri); 1609 1610 /* reduce this PMC's runcount */ 1611 atomic_subtract_rel_32(&pm->pm_runcount, 1); 1612 1613 /* 1614 * If this PMC is associated with this process, 1615 * save the reading. 1616 */ 1617 1618 if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) { 1619 1620 KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, 1621 ("[pmc,%d] pm %p != pp_pmcs[%d] %p", 1622 __LINE__, pm, ri, 1623 pp->pp_pmcs[ri].pp_pmc)); 1624 1625 KASSERT(pp->pp_refcnt > 0, 1626 ("[pmc,%d] pp refcnt = %d", __LINE__, 1627 pp->pp_refcnt)); 1628 1629 md->pmd_read_pmc(cpu, ri, &newvalue); 1630 1631 tmp = newvalue - PMC_PCPU_SAVED(cpu,ri); 1632 1633 KASSERT((int64_t) tmp >= 0, 1634 ("[pmc,%d] negative increment cpu=%d " 1635 "ri=%d newvalue=%jx saved=%jx " 1636 "incr=%jx", __LINE__, cpu, ri, 1637 newvalue, PMC_PCPU_SAVED(cpu,ri), 1638 tmp)); 1639 1640 /* 1641 * Increment the PMC's count and this 1642 * target process's count by the difference 1643 * between the current reading and the 1644 * saved value at context switch in time. 1645 */ 1646 1647 mtx_pool_lock_spin(pmc_mtxpool, pm); 1648 1649 pm->pm_gv.pm_savedvalue += tmp; 1650 pp->pp_pmcs[ri].pp_pmcval += tmp; 1651 1652 mtx_pool_unlock_spin(pmc_mtxpool, pm); 1653 1654 } 1655 1656 /* mark hardware as free */ 1657 md->pmd_config_pmc(cpu, ri, NULL); 1658 } 1659 1660 /* 1661 * perform any other architecture/cpu dependent thread 1662 * switch out functions. 1663 */ 1664 1665 (void) (*md->pmd_switch_out)(pc, pp); 1666 1667 critical_exit(); 1668 1669 } 1670 break; 1671 1672 default: 1673 #if DEBUG 1674 KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); 1675 #endif 1676 break; 1677 1678 } 1679 1680 return 0; 1681 } 1682 1683 /* 1684 * allocate a 'struct pmc_owner' descriptor in the owner hash table. 1685 */ 1686 1687 static struct pmc_owner * 1688 pmc_allocate_owner_descriptor(struct proc *p) 1689 { 1690 uint32_t hindex; 1691 struct pmc_owner *po; 1692 struct pmc_ownerhash *poh; 1693 1694 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1695 poh = &pmc_ownerhash[hindex]; 1696 1697 /* allocate space for N pointers and one descriptor struct */ 1698 MALLOC(po, struct pmc_owner *, sizeof(struct pmc_owner), 1699 M_PMC, M_WAITOK); 1700 1701 po->po_flags = 0; 1702 po->po_owner = p; 1703 LIST_INIT(&po->po_pmcs); 1704 LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */ 1705 1706 PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p", 1707 p, p->p_pid, p->p_comm, po); 1708 1709 return po; 1710 } 1711 1712 /* 1713 * find the descriptor corresponding to process 'p', adding or removing it 1714 * as specified by 'mode'. 1715 */ 1716 1717 static struct pmc_process * 1718 pmc_find_process_descriptor(struct proc *p, uint32_t mode) 1719 { 1720 uint32_t hindex; 1721 struct pmc_process *pp, *ppnew; 1722 struct pmc_processhash *pph; 1723 1724 hindex = PMC_HASH_PTR(p, pmc_processhashmask); 1725 pph = &pmc_processhash[hindex]; 1726 1727 ppnew = NULL; 1728 1729 /* 1730 * Pre-allocate memory in the FIND_ALLOCATE case since we 1731 * cannot call malloc(9) once we hold a spin lock. 1732 */ 1733 1734 if (mode & PMC_FLAG_ALLOCATE) { 1735 /* allocate additional space for 'n' pmc pointers */ 1736 MALLOC(ppnew, struct pmc_process *, 1737 sizeof(struct pmc_process) + md->pmd_npmc * 1738 sizeof(struct pmc_targetstate), M_PMC, M_ZERO|M_WAITOK); 1739 } 1740 1741 mtx_lock_spin(&pmc_processhash_mtx); 1742 LIST_FOREACH(pp, pph, pp_next) 1743 if (pp->pp_proc == p) 1744 break; 1745 1746 if ((mode & PMC_FLAG_REMOVE) && pp != NULL) 1747 LIST_REMOVE(pp, pp_next); 1748 1749 if ((mode & PMC_FLAG_ALLOCATE) && pp == NULL && 1750 ppnew != NULL) { 1751 ppnew->pp_proc = p; 1752 LIST_INSERT_HEAD(pph, ppnew, pp_next); 1753 pp = ppnew; 1754 ppnew = NULL; 1755 } 1756 mtx_unlock_spin(&pmc_processhash_mtx); 1757 1758 if (pp != NULL && ppnew != NULL) 1759 FREE(ppnew, M_PMC); 1760 1761 return pp; 1762 } 1763 1764 /* 1765 * remove a process descriptor from the process hash table. 1766 */ 1767 1768 static void 1769 pmc_remove_process_descriptor(struct pmc_process *pp) 1770 { 1771 KASSERT(pp->pp_refcnt == 0, 1772 ("[pmc,%d] Removing process descriptor %p with count %d", 1773 __LINE__, pp, pp->pp_refcnt)); 1774 1775 mtx_lock_spin(&pmc_processhash_mtx); 1776 LIST_REMOVE(pp, pp_next); 1777 mtx_unlock_spin(&pmc_processhash_mtx); 1778 } 1779 1780 1781 /* 1782 * find an owner descriptor corresponding to proc 'p' 1783 */ 1784 1785 static struct pmc_owner * 1786 pmc_find_owner_descriptor(struct proc *p) 1787 { 1788 uint32_t hindex; 1789 struct pmc_owner *po; 1790 struct pmc_ownerhash *poh; 1791 1792 hindex = PMC_HASH_PTR(p, pmc_ownerhashmask); 1793 poh = &pmc_ownerhash[hindex]; 1794 1795 po = NULL; 1796 LIST_FOREACH(po, poh, po_next) 1797 if (po->po_owner == p) 1798 break; 1799 1800 PMCDBG(OWN,FND,1, "find-owner proc=%p (%d, %s) hindex=0x%x -> " 1801 "pmc-owner=%p", p, p->p_pid, p->p_comm, hindex, po); 1802 1803 return po; 1804 } 1805 1806 /* 1807 * pmc_allocate_pmc_descriptor 1808 * 1809 * Allocate a pmc descriptor and initialize its 1810 * fields. 1811 */ 1812 1813 static struct pmc * 1814 pmc_allocate_pmc_descriptor(void) 1815 { 1816 struct pmc *pmc; 1817 1818 MALLOC(pmc, struct pmc *, sizeof(struct pmc), M_PMC, M_ZERO|M_WAITOK); 1819 1820 if (pmc != NULL) { 1821 pmc->pm_owner = NULL; 1822 LIST_INIT(&pmc->pm_targets); 1823 } 1824 1825 PMCDBG(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); 1826 1827 return pmc; 1828 } 1829 1830 /* 1831 * Destroy a pmc descriptor. 1832 */ 1833 1834 static void 1835 pmc_destroy_pmc_descriptor(struct pmc *pm) 1836 { 1837 (void) pm; 1838 1839 #if DEBUG 1840 KASSERT(pm->pm_state == PMC_STATE_DELETED || 1841 pm->pm_state == PMC_STATE_FREE, 1842 ("[pmc,%d] destroying non-deleted PMC", __LINE__)); 1843 KASSERT(LIST_EMPTY(&pm->pm_targets), 1844 ("[pmc,%d] destroying pmc with targets", __LINE__)); 1845 KASSERT(pm->pm_owner == NULL, 1846 ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); 1847 KASSERT(pm->pm_runcount == 0, 1848 ("[pmc,%d] pmc has non-zero run count %d", __LINE__, 1849 pm->pm_runcount)); 1850 #endif 1851 } 1852 1853 /* 1854 * This function does the following things: 1855 * 1856 * - detaches the PMC from hardware 1857 * - unlinks all target threads that were attached to it 1858 * - removes the PMC from its owner's list 1859 * - destroy's the PMC private mutex 1860 * 1861 * Once this function completes, the given pmc pointer can be safely 1862 * FREE'd by the caller. 1863 */ 1864 1865 static void 1866 pmc_release_pmc_descriptor(struct pmc *pm) 1867 { 1868 #if DEBUG 1869 volatile int maxloop; 1870 #endif 1871 u_int ri, cpu; 1872 enum pmc_mode mode; 1873 struct pmc_hw *phw; 1874 struct pmc_process *pp; 1875 struct pmc_target *ptgt, *tmp; 1876 struct pmc_binding pb; 1877 1878 sx_assert(&pmc_sx, SX_XLOCKED); 1879 1880 KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); 1881 1882 ri = PMC_TO_ROWINDEX(pm); 1883 mode = PMC_TO_MODE(pm); 1884 1885 PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, 1886 mode); 1887 1888 /* 1889 * First, we take the PMC off hardware. 1890 */ 1891 cpu = 0; 1892 if (PMC_IS_SYSTEM_MODE(mode)) { 1893 1894 /* 1895 * A system mode PMC runs on a specific CPU. Switch 1896 * to this CPU and turn hardware off. 1897 */ 1898 1899 pmc_save_cpu_binding(&pb); 1900 1901 cpu = PMC_TO_CPU(pm); 1902 1903 if (pm->pm_state == PMC_STATE_RUNNING) { 1904 1905 pmc_select_cpu(cpu); 1906 1907 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 1908 1909 KASSERT(phw->phw_pmc == pm, 1910 ("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)", 1911 __LINE__, ri, phw->phw_pmc, pm)); 1912 1913 PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri); 1914 1915 critical_enter(); 1916 md->pmd_stop_pmc(cpu, ri); 1917 critical_exit(); 1918 } 1919 1920 PMCDBG(PMC,REL,2, "decfg cpu=%d ri=%d", cpu, ri); 1921 1922 critical_enter(); 1923 md->pmd_config_pmc(cpu, ri, NULL); 1924 critical_exit(); 1925 1926 pm->pm_state = PMC_STATE_DELETED; 1927 1928 pmc_restore_cpu_binding(&pb); 1929 1930 } else if (PMC_IS_VIRTUAL_MODE(mode)) { 1931 1932 /* 1933 * A virtual PMC could be running on multiple CPUs at 1934 * a given instant. 1935 * 1936 * By marking its state as DELETED, we ensure that 1937 * this PMC is never further scheduled on hardware. 1938 * 1939 * Then we wait till all CPUs are done with this PMC. 1940 */ 1941 1942 pm->pm_state = PMC_STATE_DELETED; 1943 1944 1945 /* 1946 * Wait for the PMCs runcount to come to zero. 1947 */ 1948 1949 #if DEBUG 1950 maxloop = 100 * mp_ncpus; 1951 #endif 1952 1953 while (atomic_load_acq_32(&pm->pm_runcount) > 0) { 1954 1955 #if DEBUG 1956 maxloop--; 1957 KASSERT(maxloop > 0, 1958 ("[pmc,%d] (ri%d, rc%d) waiting too long for " 1959 "pmc to be free", __LINE__, 1960 PMC_TO_ROWINDEX(pm), pm->pm_runcount)); 1961 #endif 1962 1963 pmc_force_context_switch(); 1964 } 1965 1966 /* 1967 * At this point the PMC is off all CPUs and cannot be 1968 * freshly scheduled onto a CPU. It is now safe to 1969 * unlink all targets from this PMC. If a 1970 * process-record's refcount falls to zero, we remove 1971 * it from the hash table. The module-wide SX lock 1972 * protects us from races. 1973 */ 1974 1975 LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) { 1976 pp = ptgt->pt_process; 1977 pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */ 1978 1979 PMCDBG(PMC,REL,3, "pp->refcnt=%d", pp->pp_refcnt); 1980 1981 /* 1982 * If the target process record shows that no 1983 * PMCs are attached to it, reclaim its space. 1984 */ 1985 1986 if (pp->pp_refcnt == 0) { 1987 pmc_remove_process_descriptor(pp); 1988 FREE(pp, M_PMC); 1989 } 1990 } 1991 1992 cpu = curthread->td_oncpu; /* setup cpu for pmd_release() */ 1993 1994 } 1995 1996 /* 1997 * Release any MD resources 1998 */ 1999 2000 (void) md->pmd_release_pmc(cpu, ri, pm); 2001 2002 /* 2003 * Update row disposition 2004 */ 2005 2006 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) 2007 PMC_UNMARK_ROW_STANDALONE(ri); 2008 else 2009 PMC_UNMARK_ROW_THREAD(ri); 2010 2011 /* unlink from the owner's list */ 2012 if (pm->pm_owner) 2013 pmc_unlink_owner(pm); 2014 2015 pmc_destroy_pmc_descriptor(pm); 2016 } 2017 2018 /* 2019 * Register an owner and a pmc. 2020 */ 2021 2022 static int 2023 pmc_register_owner(struct proc *p, struct pmc *pmc) 2024 { 2025 struct pmc_list *pl; 2026 struct pmc_owner *po; 2027 2028 sx_assert(&pmc_sx, SX_XLOCKED); 2029 2030 MALLOC(pl, struct pmc_list *, sizeof(struct pmc_list), M_PMC, 2031 M_WAITOK); 2032 2033 if (pl == NULL) 2034 return ENOMEM; 2035 2036 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2037 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { 2038 FREE(pl, M_PMC); 2039 return ENOMEM; 2040 } 2041 2042 /* XXX is this too restrictive */ 2043 if (PMC_ID_TO_MODE(pmc->pm_id) == PMC_MODE_TS) { 2044 /* can have only one TS mode PMC per process */ 2045 if (po->po_flags & PMC_PO_HAS_TS_PMC) { 2046 FREE(pl, M_PMC); 2047 return EINVAL; 2048 } 2049 po->po_flags |= PMC_PO_HAS_TS_PMC; 2050 } 2051 2052 KASSERT(pmc->pm_owner == NULL, 2053 ("[pmc,%d] attempting to own an initialized PMC", __LINE__)); 2054 pmc->pm_owner = po; 2055 2056 pl->pl_pmc = pmc; 2057 2058 LIST_INSERT_HEAD(&po->po_pmcs, pl, pl_next); 2059 2060 PROC_LOCK(p); 2061 p->p_flag |= P_HWPMC; 2062 PROC_UNLOCK(p); 2063 2064 PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pl=%p pmc=%p", 2065 po, pl, pmc); 2066 2067 return 0; 2068 } 2069 2070 /* 2071 * Return the current row disposition: 2072 * == 0 => FREE 2073 * > 0 => PROCESS MODE 2074 * < 0 => SYSTEM MODE 2075 */ 2076 2077 int 2078 pmc_getrowdisp(int ri) 2079 { 2080 return pmc_pmcdisp[ri]; 2081 } 2082 2083 /* 2084 * Check if a PMC at row index 'ri' can be allocated to the current 2085 * process. 2086 * 2087 * Allocation can fail if: 2088 * - the current process is already being profiled by a PMC at index 'ri', 2089 * attached to it via OP_PMCATTACH. 2090 * - the current process has already allocated a PMC at index 'ri' 2091 * via OP_ALLOCATE. 2092 */ 2093 2094 static int 2095 pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) 2096 { 2097 enum pmc_mode mode; 2098 struct pmc *pm; 2099 struct pmc_list *pl; 2100 struct pmc_owner *po; 2101 struct pmc_process *pp; 2102 2103 PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " 2104 "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); 2105 2106 /* 2107 * We shouldn't have already allocated a process-mode PMC at 2108 * row index 'ri'. 2109 * 2110 * We shouldn't have allocated a system-wide PMC on the same 2111 * CPU and same RI. 2112 */ 2113 if ((po = pmc_find_owner_descriptor(p)) != NULL) 2114 LIST_FOREACH(pl, &po->po_pmcs, pl_next) { 2115 pm = pl->pl_pmc; 2116 if (PMC_TO_ROWINDEX(pm) == ri) { 2117 mode = PMC_TO_MODE(pm); 2118 if (PMC_IS_VIRTUAL_MODE(mode)) 2119 return EEXIST; 2120 if (PMC_IS_SYSTEM_MODE(mode) && 2121 (int) PMC_TO_CPU(pm) == cpu) 2122 return EEXIST; 2123 } 2124 } 2125 2126 /* 2127 * We also shouldn't be the target of any PMC at this index 2128 * since otherwise a PMC_ATTACH to ourselves will fail. 2129 */ 2130 if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) 2131 if (pp->pp_pmcs[ri].pp_pmc) 2132 return EEXIST; 2133 2134 PMCDBG(PMC,ALR,2, "can-allocate-rowindex proc=%p (%d, %s) ri=%d ok", 2135 p, p->p_pid, p->p_comm, ri); 2136 2137 return 0; 2138 } 2139 2140 /* 2141 * Check if a given PMC at row index 'ri' can be currently used in 2142 * mode 'mode'. 2143 */ 2144 2145 static int 2146 pmc_can_allocate_row(int ri, enum pmc_mode mode) 2147 { 2148 enum pmc_disp disp; 2149 2150 sx_assert(&pmc_sx, SX_XLOCKED); 2151 2152 PMCDBG(PMC,ALR,1, "can-allocate-row ri=%d mode=%d", ri, mode); 2153 2154 if (PMC_IS_SYSTEM_MODE(mode)) 2155 disp = PMC_DISP_STANDALONE; 2156 else 2157 disp = PMC_DISP_THREAD; 2158 2159 /* 2160 * check disposition for PMC row 'ri': 2161 * 2162 * Expected disposition Row-disposition Result 2163 * 2164 * STANDALONE STANDALONE or FREE proceed 2165 * STANDALONE THREAD fail 2166 * THREAD THREAD or FREE proceed 2167 * THREAD STANDALONE fail 2168 */ 2169 2170 if (!PMC_ROW_DISP_IS_FREE(ri) && 2171 !(disp == PMC_DISP_THREAD && PMC_ROW_DISP_IS_THREAD(ri)) && 2172 !(disp == PMC_DISP_STANDALONE && PMC_ROW_DISP_IS_STANDALONE(ri))) 2173 return EBUSY; 2174 2175 /* 2176 * All OK 2177 */ 2178 2179 PMCDBG(PMC,ALR,2, "can-allocate-row ri=%d mode=%d ok", ri, mode); 2180 2181 return 0; 2182 2183 } 2184 2185 /* 2186 * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. 2187 */ 2188 2189 static struct pmc * 2190 pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) 2191 { 2192 struct pmc_list *pl; 2193 2194 KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, 2195 ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, 2196 PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); 2197 2198 LIST_FOREACH(pl, &po->po_pmcs, pl_next) 2199 if (pl->pl_pmc->pm_id == pmcid) 2200 return pl->pl_pmc; 2201 2202 return NULL; 2203 } 2204 2205 static int 2206 pmc_find_pmc(pmc_id_t pmcid, struct pmc **pmc) 2207 { 2208 2209 struct pmc *pm; 2210 struct pmc_owner *po; 2211 2212 PMCDBG(PMC,FND,1, "find-pmc id=%d", pmcid); 2213 2214 if ((po = pmc_find_owner_descriptor(curthread->td_proc)) == NULL) 2215 return ESRCH; 2216 2217 if ((pm = pmc_find_pmc_descriptor_in_process(po, pmcid)) == NULL) 2218 return EINVAL; 2219 2220 PMCDBG(PMC,FND,2, "find-pmc id=%d -> pmc=%p", pmcid, pm); 2221 2222 *pmc = pm; 2223 return 0; 2224 } 2225 2226 /* 2227 * Start a PMC. 2228 */ 2229 2230 static int 2231 pmc_start(struct pmc *pm) 2232 { 2233 int error, cpu, ri; 2234 enum pmc_mode mode; 2235 struct pmc_binding pb; 2236 2237 KASSERT(pm != NULL, 2238 ("[pmc,%d] null pm", __LINE__)); 2239 2240 mode = PMC_TO_MODE(pm); 2241 ri = PMC_TO_ROWINDEX(pm); 2242 error = 0; 2243 2244 PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); 2245 2246 pm->pm_state = PMC_STATE_RUNNING; 2247 2248 if (PMC_IS_VIRTUAL_MODE(mode)) { 2249 2250 /* 2251 * If a PMCATTACH hadn't been done on this 2252 * PMC, attach this PMC to its owner process. 2253 */ 2254 2255 if (LIST_EMPTY(&pm->pm_targets)) 2256 error = pmc_attach_process(pm->pm_owner->po_owner, pm); 2257 2258 /* 2259 * If the PMC is attached to its owner, then force a context 2260 * switch to ensure that the MD state gets set correctly. 2261 */ 2262 if (error == 0 && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)) 2263 pmc_force_context_switch(); 2264 2265 /* 2266 * Nothing further to be done; thread context switch code 2267 * will start/stop the hardware as appropriate. 2268 */ 2269 2270 return error; 2271 2272 } 2273 2274 /* 2275 * A system-wide PMC. Move to the CPU associated with this 2276 * PMC, and start the hardware. 2277 */ 2278 2279 pmc_save_cpu_binding(&pb); 2280 2281 cpu = PMC_TO_CPU(pm); 2282 2283 if (pmc_cpu_is_disabled(cpu)) 2284 return ENXIO; 2285 2286 pmc_select_cpu(cpu); 2287 2288 /* 2289 * global PMCs are configured at allocation time 2290 * so write out the initial value and start the PMC. 2291 */ 2292 2293 critical_enter(); 2294 if ((error = md->pmd_write_pmc(cpu, ri, 2295 PMC_IS_SAMPLING_MODE(mode) ? 2296 pm->pm_sc.pm_reloadcount : 2297 pm->pm_sc.pm_initial)) == 0) 2298 error = md->pmd_start_pmc(cpu, ri); 2299 critical_exit(); 2300 2301 pmc_restore_cpu_binding(&pb); 2302 2303 return error; 2304 } 2305 2306 /* 2307 * Stop a PMC. 2308 */ 2309 2310 static int 2311 pmc_stop(struct pmc *pm) 2312 { 2313 int cpu, error, ri; 2314 struct pmc_binding pb; 2315 2316 KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); 2317 2318 PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, 2319 PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); 2320 2321 pm->pm_state = PMC_STATE_STOPPED; 2322 2323 /* 2324 * If the PMC is a virtual mode one, changing the state to 2325 * non-RUNNING is enough to ensure that the PMC never gets 2326 * scheduled. 2327 * 2328 * If this PMC is current running on a CPU, then it will 2329 * handled correctly at the time its target process is context 2330 * switched out. 2331 */ 2332 2333 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) 2334 return 0; 2335 2336 /* 2337 * A system-mode PMC. Move to the CPU associated with 2338 * this PMC, and stop the hardware. We update the 2339 * 'initial count' so that a subsequent PMCSTART will 2340 * resume counting from the current hardware count. 2341 */ 2342 2343 pmc_save_cpu_binding(&pb); 2344 2345 cpu = PMC_TO_CPU(pm); 2346 2347 KASSERT(cpu >= 0 && cpu < mp_ncpus, 2348 ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); 2349 2350 if (pmc_cpu_is_disabled(cpu)) 2351 return ENXIO; 2352 2353 pmc_select_cpu(cpu); 2354 2355 ri = PMC_TO_ROWINDEX(pm); 2356 2357 critical_enter(); 2358 if ((error = md->pmd_stop_pmc(cpu, ri)) == 0) 2359 error = md->pmd_read_pmc(cpu, ri, &pm->pm_sc.pm_initial); 2360 critical_exit(); 2361 2362 pmc_restore_cpu_binding(&pb); 2363 2364 return error; 2365 } 2366 2367 2368 #if DEBUG 2369 static const char *pmc_op_to_name[] = { 2370 #undef __PMC_OP 2371 #define __PMC_OP(N, D) #N , 2372 __PMC_OPS() 2373 NULL 2374 }; 2375 #endif 2376 2377 /* 2378 * The syscall interface 2379 */ 2380 2381 #define PMC_GET_SX_XLOCK(...) do { \ 2382 sx_xlock(&pmc_sx); \ 2383 if (pmc_hook == NULL) { \ 2384 sx_xunlock(&pmc_sx); \ 2385 return __VA_ARGS__; \ 2386 } \ 2387 } while (0) 2388 2389 #define PMC_DOWNGRADE_SX() do { \ 2390 sx_downgrade(&pmc_sx); \ 2391 is_sx_downgraded = 1; \ 2392 } while (0) 2393 2394 static int 2395 pmc_syscall_handler(struct thread *td, void *syscall_args) 2396 { 2397 int error, is_sx_downgraded, op; 2398 struct pmc_syscall_args *c; 2399 void *arg; 2400 2401 PMC_GET_SX_XLOCK(ENOSYS); 2402 2403 is_sx_downgraded = 0; 2404 2405 c = (struct pmc_syscall_args *) syscall_args; 2406 2407 op = c->pmop_code; 2408 arg = c->pmop_data; 2409 2410 PMCDBG(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op, 2411 pmc_op_to_name[op], arg); 2412 2413 error = 0; 2414 atomic_add_int(&pmc_stats.pm_syscalls, 1); 2415 2416 switch(op) 2417 { 2418 2419 2420 /* 2421 * Configure a log file. 2422 * 2423 * XXX This OP will be reworked. 2424 */ 2425 2426 case PMC_OP_CONFIGURELOG: 2427 { 2428 struct pmc_owner *po; 2429 struct pmc_op_configurelog cl; 2430 struct proc *p; 2431 2432 sx_assert(&pmc_sx, SX_XLOCKED); 2433 2434 if ((error = copyin(arg, &cl, sizeof(cl))) != 0) 2435 break; 2436 2437 /* mark this process as owning a log file */ 2438 p = td->td_proc; 2439 if ((po = pmc_find_owner_descriptor(p)) == NULL) 2440 if ((po = pmc_allocate_owner_descriptor(p)) == NULL) 2441 return ENOMEM; 2442 2443 if ((error = pmc_configure_log(po, cl.pm_logfd)) != 0) 2444 break; 2445 2446 } 2447 break; 2448 2449 2450 /* 2451 * Retrieve hardware configuration. 2452 */ 2453 2454 case PMC_OP_GETCPUINFO: /* CPU information */ 2455 { 2456 struct pmc_op_getcpuinfo gci; 2457 2458 gci.pm_cputype = md->pmd_cputype; 2459 gci.pm_ncpu = mp_ncpus; 2460 gci.pm_npmc = md->pmd_npmc; 2461 gci.pm_nclass = md->pmd_nclass; 2462 bcopy(md->pmd_classes, &gci.pm_classes, 2463 sizeof(gci.pm_classes)); 2464 error = copyout(&gci, arg, sizeof(gci)); 2465 } 2466 break; 2467 2468 2469 /* 2470 * Get module statistics 2471 */ 2472 2473 case PMC_OP_GETDRIVERSTATS: 2474 { 2475 struct pmc_op_getdriverstats gms; 2476 2477 bcopy(&pmc_stats, &gms, sizeof(gms)); 2478 error = copyout(&gms, arg, sizeof(gms)); 2479 } 2480 break; 2481 2482 2483 /* 2484 * Retrieve module version number 2485 */ 2486 2487 case PMC_OP_GETMODULEVERSION: 2488 { 2489 error = copyout(&_pmc_version.mv_version, arg, sizeof(int)); 2490 } 2491 break; 2492 2493 2494 /* 2495 * Retrieve the state of all the PMCs on a given 2496 * CPU. 2497 */ 2498 2499 case PMC_OP_GETPMCINFO: 2500 { 2501 uint32_t cpu, n, npmc; 2502 size_t pmcinfo_size; 2503 struct pmc *pm; 2504 struct pmc_info *p, *pmcinfo; 2505 struct pmc_op_getpmcinfo *gpi; 2506 struct pmc_owner *po; 2507 struct pmc_binding pb; 2508 2509 PMC_DOWNGRADE_SX(); 2510 2511 gpi = (struct pmc_op_getpmcinfo *) arg; 2512 2513 if ((error = copyin(&gpi->pm_cpu, &cpu, sizeof(cpu))) != 0) 2514 break; 2515 2516 if (cpu >= (unsigned int) mp_ncpus) { 2517 error = EINVAL; 2518 break; 2519 } 2520 2521 if (pmc_cpu_is_disabled(cpu)) { 2522 error = ENXIO; 2523 break; 2524 } 2525 2526 /* switch to CPU 'cpu' */ 2527 pmc_save_cpu_binding(&pb); 2528 pmc_select_cpu(cpu); 2529 2530 npmc = md->pmd_npmc; 2531 2532 pmcinfo_size = npmc * sizeof(struct pmc_info); 2533 MALLOC(pmcinfo, struct pmc_info *, pmcinfo_size, M_PMC, 2534 M_WAITOK); 2535 2536 p = pmcinfo; 2537 2538 for (n = 0; n < md->pmd_npmc; n++, p++) { 2539 2540 if ((error = md->pmd_describe(cpu, n, p, &pm)) != 0) 2541 break; 2542 2543 if (PMC_ROW_DISP_IS_STANDALONE(n)) 2544 p->pm_rowdisp = PMC_DISP_STANDALONE; 2545 else if (PMC_ROW_DISP_IS_THREAD(n)) 2546 p->pm_rowdisp = PMC_DISP_THREAD; 2547 else 2548 p->pm_rowdisp = PMC_DISP_FREE; 2549 2550 p->pm_ownerpid = -1; 2551 2552 if (pm == NULL) /* no PMC associated */ 2553 continue; 2554 2555 po = pm->pm_owner; 2556 2557 KASSERT(po->po_owner != NULL, 2558 ("[pmc,%d] pmc_owner had a null proc pointer", 2559 __LINE__)); 2560 2561 p->pm_ownerpid = po->po_owner->p_pid; 2562 p->pm_mode = PMC_TO_MODE(pm); 2563 p->pm_event = pm->pm_event; 2564 p->pm_flags = pm->pm_flags; 2565 2566 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 2567 p->pm_reloadcount = 2568 pm->pm_sc.pm_reloadcount; 2569 } 2570 2571 pmc_restore_cpu_binding(&pb); 2572 2573 /* now copy out the PMC info collected */ 2574 if (error == 0) 2575 error = copyout(pmcinfo, &gpi->pm_pmcs, pmcinfo_size); 2576 2577 FREE(pmcinfo, M_PMC); 2578 } 2579 break; 2580 2581 2582 /* 2583 * Set the administrative state of a PMC. I.e. whether 2584 * the PMC is to be used or not. 2585 */ 2586 2587 case PMC_OP_PMCADMIN: 2588 { 2589 int cpu, ri; 2590 enum pmc_state request; 2591 struct pmc_cpu *pc; 2592 struct pmc_hw *phw; 2593 struct pmc_op_pmcadmin pma; 2594 struct pmc_binding pb; 2595 2596 sx_assert(&pmc_sx, SX_XLOCKED); 2597 2598 KASSERT(td == curthread, 2599 ("[pmc,%d] td != curthread", __LINE__)); 2600 2601 if (suser(td) || jailed(td->td_ucred)) { 2602 error = EPERM; 2603 break; 2604 } 2605 2606 if ((error = copyin(arg, &pma, sizeof(pma))) != 0) 2607 break; 2608 2609 cpu = pma.pm_cpu; 2610 2611 if (cpu < 0 || cpu >= mp_ncpus) { 2612 error = EINVAL; 2613 break; 2614 } 2615 2616 if (pmc_cpu_is_disabled(cpu)) { 2617 error = ENXIO; 2618 break; 2619 } 2620 2621 request = pma.pm_state; 2622 2623 if (request != PMC_STATE_DISABLED && 2624 request != PMC_STATE_FREE) { 2625 error = EINVAL; 2626 break; 2627 } 2628 2629 ri = pma.pm_pmc; /* pmc id == row index */ 2630 if (ri < 0 || ri >= (int) md->pmd_npmc) { 2631 error = EINVAL; 2632 break; 2633 } 2634 2635 /* 2636 * We can't disable a PMC with a row-index allocated 2637 * for process virtual PMCs. 2638 */ 2639 2640 if (PMC_ROW_DISP_IS_THREAD(ri) && 2641 request == PMC_STATE_DISABLED) { 2642 error = EBUSY; 2643 break; 2644 } 2645 2646 /* 2647 * otherwise, this PMC on this CPU is either free or 2648 * in system-wide mode. 2649 */ 2650 2651 pmc_save_cpu_binding(&pb); 2652 pmc_select_cpu(cpu); 2653 2654 pc = pmc_pcpu[cpu]; 2655 phw = pc->pc_hwpmcs[ri]; 2656 2657 /* 2658 * XXX do we need some kind of 'forced' disable? 2659 */ 2660 2661 if (phw->phw_pmc == NULL) { 2662 if (request == PMC_STATE_DISABLED && 2663 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED)) { 2664 phw->phw_state &= ~PMC_PHW_FLAG_IS_ENABLED; 2665 PMC_MARK_ROW_STANDALONE(ri); 2666 } else if (request == PMC_STATE_FREE && 2667 (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0) { 2668 phw->phw_state |= PMC_PHW_FLAG_IS_ENABLED; 2669 PMC_UNMARK_ROW_STANDALONE(ri); 2670 } 2671 /* other cases are a no-op */ 2672 } else 2673 error = EBUSY; 2674 2675 pmc_restore_cpu_binding(&pb); 2676 } 2677 break; 2678 2679 2680 /* 2681 * Allocate a PMC. 2682 */ 2683 2684 case PMC_OP_PMCALLOCATE: 2685 { 2686 uint32_t caps; 2687 u_int cpu; 2688 int n; 2689 enum pmc_mode mode; 2690 struct pmc *pmc; 2691 struct pmc_hw *phw; 2692 struct pmc_op_pmcallocate pa; 2693 struct pmc_binding pb; 2694 2695 if ((error = copyin(arg, &pa, sizeof(pa))) != 0) 2696 break; 2697 2698 caps = pa.pm_caps; 2699 mode = pa.pm_mode; 2700 cpu = pa.pm_cpu; 2701 2702 if ((mode != PMC_MODE_SS && mode != PMC_MODE_SC && 2703 mode != PMC_MODE_TS && mode != PMC_MODE_TC) || 2704 (cpu != (u_int) PMC_CPU_ANY && cpu >= (u_int) mp_ncpus)) { 2705 error = EINVAL; 2706 break; 2707 } 2708 2709 /* 2710 * Virtual PMCs should only ask for a default CPU. 2711 * System mode PMCs need to specify a non-default CPU. 2712 */ 2713 2714 if ((PMC_IS_VIRTUAL_MODE(mode) && cpu != (u_int) PMC_CPU_ANY) || 2715 (PMC_IS_SYSTEM_MODE(mode) && cpu == (u_int) PMC_CPU_ANY)) { 2716 error = EINVAL; 2717 break; 2718 } 2719 2720 /* 2721 * Check that a disabled CPU is not being asked for. 2722 */ 2723 2724 if (PMC_IS_SYSTEM_MODE(mode) && pmc_cpu_is_disabled(cpu)) { 2725 error = ENXIO; 2726 break; 2727 } 2728 2729 /* 2730 * Refuse an allocation for a system-wide PMC if this 2731 * process has been jailed, or if this process lacks 2732 * super-user credentials and the sysctl tunable 2733 * 'security.bsd.unprivileged_syspmcs' is zero. 2734 */ 2735 2736 if (PMC_IS_SYSTEM_MODE(mode)) { 2737 if (jailed(curthread->td_ucred)) 2738 error = EPERM; 2739 else if (suser(curthread) && 2740 (pmc_unprivileged_syspmcs == 0)) 2741 error = EPERM; 2742 } 2743 2744 if (error) 2745 break; 2746 2747 /* 2748 * Look for valid values for 'pm_flags' 2749 */ 2750 2751 if ((pa.pm_flags & ~(PMC_F_DESCENDANTS|PMC_F_LOG_TC_CSW)) 2752 != 0) { 2753 error = EINVAL; 2754 break; 2755 } 2756 2757 /* 2758 * All sampling mode PMCs need to be able to interrupt the 2759 * CPU. 2760 */ 2761 2762 if (PMC_IS_SAMPLING_MODE(mode)) { 2763 caps |= PMC_CAP_INTERRUPT; 2764 error = ENOSYS; /* for snapshot 6 */ 2765 break; 2766 } 2767 2768 PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d", 2769 pa.pm_ev, caps, mode, cpu); 2770 2771 pmc = pmc_allocate_pmc_descriptor(); 2772 pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, 2773 PMC_ID_INVALID); 2774 pmc->pm_event = pa.pm_ev; 2775 pmc->pm_state = PMC_STATE_FREE; 2776 pmc->pm_caps = caps; 2777 pmc->pm_flags = pa.pm_flags; 2778 2779 /* switch thread to CPU 'cpu' */ 2780 pmc_save_cpu_binding(&pb); 2781 2782 #define PMC_IS_SHAREABLE_PMC(cpu, n) \ 2783 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_state & \ 2784 PMC_PHW_FLAG_IS_SHAREABLE) 2785 #define PMC_IS_UNALLOCATED(cpu, n) \ 2786 (pmc_pcpu[(cpu)]->pc_hwpmcs[(n)]->phw_pmc == NULL) 2787 2788 if (PMC_IS_SYSTEM_MODE(mode)) { 2789 pmc_select_cpu(cpu); 2790 for (n = 0; n < (int) md->pmd_npmc; n++) 2791 if (pmc_can_allocate_row(n, mode) == 0 && 2792 pmc_can_allocate_rowindex( 2793 curthread->td_proc, n, cpu) == 0 && 2794 (PMC_IS_UNALLOCATED(cpu, n) || 2795 PMC_IS_SHAREABLE_PMC(cpu, n)) && 2796 md->pmd_allocate_pmc(cpu, n, pmc, 2797 &pa) == 0) 2798 break; 2799 } else { 2800 /* Process virtual mode */ 2801 for (n = 0; n < (int) md->pmd_npmc; n++) { 2802 if (pmc_can_allocate_row(n, mode) == 0 && 2803 pmc_can_allocate_rowindex( 2804 curthread->td_proc, n, 2805 PMC_CPU_ANY) == 0 && 2806 md->pmd_allocate_pmc(curthread->td_oncpu, 2807 n, pmc, &pa) == 0) 2808 break; 2809 } 2810 } 2811 2812 #undef PMC_IS_UNALLOCATED 2813 #undef PMC_IS_SHAREABLE_PMC 2814 2815 pmc_restore_cpu_binding(&pb); 2816 2817 if (n == (int) md->pmd_npmc) { 2818 pmc_destroy_pmc_descriptor(pmc); 2819 FREE(pmc, M_PMC); 2820 pmc = NULL; 2821 error = EINVAL; 2822 break; 2823 } 2824 2825 /* Fill in the correct value in the ID field */ 2826 pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); 2827 2828 PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", 2829 pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); 2830 2831 /* 2832 * Configure global pmc's immediately 2833 */ 2834 2835 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { 2836 2837 pmc_save_cpu_binding(&pb); 2838 pmc_select_cpu(cpu); 2839 2840 phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; 2841 2842 if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || 2843 (error = md->pmd_config_pmc(cpu, n, pmc)) != 0) { 2844 (void) md->pmd_release_pmc(cpu, n, pmc); 2845 pmc_destroy_pmc_descriptor(pmc); 2846 FREE(pmc, M_PMC); 2847 pmc = NULL; 2848 pmc_restore_cpu_binding(&pb); 2849 error = EPERM; 2850 break; 2851 } 2852 2853 pmc_restore_cpu_binding(&pb); 2854 } 2855 2856 pmc->pm_state = PMC_STATE_ALLOCATED; 2857 2858 /* 2859 * mark row disposition 2860 */ 2861 2862 if (PMC_IS_SYSTEM_MODE(mode)) 2863 PMC_MARK_ROW_STANDALONE(n); 2864 else 2865 PMC_MARK_ROW_THREAD(n); 2866 2867 /* 2868 * Register this PMC with the current thread as its owner. 2869 */ 2870 2871 if ((error = 2872 pmc_register_owner(curthread->td_proc, pmc)) != 0) { 2873 pmc_release_pmc_descriptor(pmc); 2874 FREE(pmc, M_PMC); 2875 pmc = NULL; 2876 break; 2877 } 2878 2879 /* 2880 * Return the allocated index. 2881 */ 2882 2883 pa.pm_pmcid = pmc->pm_id; 2884 2885 error = copyout(&pa, arg, sizeof(pa)); 2886 } 2887 break; 2888 2889 2890 /* 2891 * Attach a PMC to a process. 2892 */ 2893 2894 case PMC_OP_PMCATTACH: 2895 { 2896 struct pmc *pm; 2897 struct proc *p; 2898 struct pmc_op_pmcattach a; 2899 2900 sx_assert(&pmc_sx, SX_XLOCKED); 2901 2902 if ((error = copyin(arg, &a, sizeof(a))) != 0) 2903 break; 2904 2905 if (a.pm_pid < 0) { 2906 error = EINVAL; 2907 break; 2908 } else if (a.pm_pid == 0) 2909 a.pm_pid = td->td_proc->p_pid; 2910 2911 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 2912 break; 2913 2914 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 2915 error = EINVAL; 2916 break; 2917 } 2918 2919 /* PMCs may be (re)attached only when allocated or stopped */ 2920 if (pm->pm_state == PMC_STATE_RUNNING) { 2921 error = EBUSY; 2922 break; 2923 } else if (pm->pm_state != PMC_STATE_ALLOCATED && 2924 pm->pm_state != PMC_STATE_STOPPED) { 2925 error = EINVAL; 2926 break; 2927 } 2928 2929 /* lookup pid */ 2930 if ((p = pfind(a.pm_pid)) == NULL) { 2931 error = ESRCH; 2932 break; 2933 } 2934 2935 /* 2936 * Ignore processes that are working on exiting. 2937 */ 2938 if (p->p_flag & P_WEXIT) { 2939 error = ESRCH; 2940 PROC_UNLOCK(p); /* pfind() returns a locked process */ 2941 break; 2942 } 2943 2944 /* 2945 * we are allowed to attach a PMC to a process if 2946 * we can debug it. 2947 */ 2948 error = p_candebug(curthread, p); 2949 2950 PROC_UNLOCK(p); 2951 2952 if (error == 0) 2953 error = pmc_attach_process(p, pm); 2954 } 2955 break; 2956 2957 2958 /* 2959 * Detach an attached PMC from a process. 2960 */ 2961 2962 case PMC_OP_PMCDETACH: 2963 { 2964 struct pmc *pm; 2965 struct proc *p; 2966 struct pmc_op_pmcattach a; 2967 2968 if ((error = copyin(arg, &a, sizeof(a))) != 0) 2969 break; 2970 2971 if (a.pm_pid < 0) { 2972 error = EINVAL; 2973 break; 2974 } else if (a.pm_pid == 0) 2975 a.pm_pid = td->td_proc->p_pid; 2976 2977 if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) 2978 break; 2979 2980 if ((p = pfind(a.pm_pid)) == NULL) { 2981 error = ESRCH; 2982 break; 2983 } 2984 2985 /* 2986 * Treat processes that are in the process of exiting 2987 * as if they were not present. 2988 */ 2989 2990 if (p->p_flag & P_WEXIT) 2991 error = ESRCH; 2992 2993 PROC_UNLOCK(p); /* pfind() returns a locked process */ 2994 2995 if (error == 0) 2996 error = pmc_detach_process(p, pm); 2997 } 2998 break; 2999 3000 3001 /* 3002 * Release an allocated PMC 3003 */ 3004 3005 case PMC_OP_PMCRELEASE: 3006 { 3007 pmc_id_t pmcid; 3008 struct pmc *pm; 3009 struct pmc_owner *po; 3010 struct pmc_op_simple sp; 3011 3012 /* 3013 * Find PMC pointer for the named PMC. 3014 * 3015 * Use pmc_release_pmc_descriptor() to switch off the 3016 * PMC, remove all its target threads, and remove the 3017 * PMC from its owner's list. 3018 * 3019 * Remove the owner record if this is the last PMC 3020 * owned. 3021 * 3022 * Free up space. 3023 */ 3024 3025 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3026 break; 3027 3028 pmcid = sp.pm_pmcid; 3029 3030 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3031 break; 3032 3033 po = pm->pm_owner; 3034 pmc_release_pmc_descriptor(pm); 3035 pmc_maybe_remove_owner(po); 3036 3037 FREE(pm, M_PMC); 3038 } 3039 break; 3040 3041 3042 /* 3043 * Read and/or write a PMC. 3044 */ 3045 3046 case PMC_OP_PMCRW: 3047 { 3048 uint32_t cpu, ri; 3049 struct pmc *pm; 3050 struct pmc_op_pmcrw *pprw; 3051 struct pmc_op_pmcrw prw; 3052 struct pmc_binding pb; 3053 pmc_value_t oldvalue; 3054 3055 PMC_DOWNGRADE_SX(); 3056 3057 if ((error = copyin(arg, &prw, sizeof(prw))) != 0) 3058 break; 3059 3060 ri = 0; 3061 PMCDBG(PMC,OPS,1, "rw id=%d flags=0x%x", prw.pm_pmcid, 3062 prw.pm_flags); 3063 3064 /* must have at least one flag set */ 3065 if ((prw.pm_flags & (PMC_F_OLDVALUE|PMC_F_NEWVALUE)) == 0) { 3066 error = EINVAL; 3067 break; 3068 } 3069 3070 /* locate pmc descriptor */ 3071 if ((error = pmc_find_pmc(prw.pm_pmcid, &pm)) != 0) 3072 break; 3073 3074 /* Can't read a PMC that hasn't been started. */ 3075 if (pm->pm_state != PMC_STATE_ALLOCATED && 3076 pm->pm_state != PMC_STATE_STOPPED && 3077 pm->pm_state != PMC_STATE_RUNNING) { 3078 error = EINVAL; 3079 break; 3080 } 3081 3082 /* writing a new value is allowed only for 'STOPPED' pmcs */ 3083 if (pm->pm_state == PMC_STATE_RUNNING && 3084 (prw.pm_flags & PMC_F_NEWVALUE)) { 3085 error = EBUSY; 3086 break; 3087 } 3088 3089 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 3090 3091 /* 3092 * If this PMC is attached to its owner (i.e., 3093 * the process requesting this operation) and 3094 * is running, then attempt to get an 3095 * upto-date reading from hardware for a READ. 3096 * Writes are only allowed when the PMC is 3097 * stopped, so only update the saved value 3098 * field. 3099 * 3100 * If the PMC is not running, or is not 3101 * attached to its owner, read/write to the 3102 * savedvalue field. 3103 */ 3104 3105 ri = PMC_TO_ROWINDEX(pm); 3106 3107 mtx_pool_lock_spin(pmc_mtxpool, pm); 3108 cpu = curthread->td_oncpu; 3109 3110 if (prw.pm_flags & PMC_F_OLDVALUE) { 3111 if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && 3112 (pm->pm_state == PMC_STATE_RUNNING)) 3113 error = (*md->pmd_read_pmc)(cpu, ri, 3114 &oldvalue); 3115 else 3116 oldvalue = pm->pm_gv.pm_savedvalue; 3117 } 3118 if (prw.pm_flags & PMC_F_NEWVALUE) 3119 pm->pm_gv.pm_savedvalue = prw.pm_value; 3120 3121 mtx_pool_unlock_spin(pmc_mtxpool, pm); 3122 3123 } else { /* System mode PMCs */ 3124 cpu = PMC_TO_CPU(pm); 3125 ri = PMC_TO_ROWINDEX(pm); 3126 3127 if (pmc_cpu_is_disabled(cpu)) { 3128 error = ENXIO; 3129 break; 3130 } 3131 3132 /* move this thread to CPU 'cpu' */ 3133 pmc_save_cpu_binding(&pb); 3134 pmc_select_cpu(cpu); 3135 3136 critical_enter(); 3137 /* save old value */ 3138 if (prw.pm_flags & PMC_F_OLDVALUE) 3139 if ((error = (*md->pmd_read_pmc)(cpu, ri, 3140 &oldvalue))) 3141 goto error; 3142 /* write out new value */ 3143 if (prw.pm_flags & PMC_F_NEWVALUE) 3144 error = (*md->pmd_write_pmc)(cpu, ri, 3145 prw.pm_value); 3146 error: 3147 critical_exit(); 3148 pmc_restore_cpu_binding(&pb); 3149 if (error) 3150 break; 3151 } 3152 3153 pprw = (struct pmc_op_pmcrw *) arg; 3154 3155 #if DEBUG 3156 if (prw.pm_flags & PMC_F_NEWVALUE) 3157 PMCDBG(PMC,OPS,2, "rw id=%d new %jx -> old %jx", 3158 ri, prw.pm_value, oldvalue); 3159 else 3160 PMCDBG(PMC,OPS,2, "rw id=%d -> old %jx", ri, oldvalue); 3161 #endif 3162 3163 /* return old value if requested */ 3164 if (prw.pm_flags & PMC_F_OLDVALUE) 3165 if ((error = copyout(&oldvalue, &pprw->pm_value, 3166 sizeof(prw.pm_value)))) 3167 break; 3168 3169 /* 3170 * send a signal (SIGIO) to the owner if it is trying to read 3171 * a PMC with no target processes attached. 3172 */ 3173 3174 if (LIST_EMPTY(&pm->pm_targets) && 3175 (prw.pm_flags & PMC_F_OLDVALUE)) { 3176 PROC_LOCK(curthread->td_proc); 3177 psignal(curthread->td_proc, SIGIO); 3178 PROC_UNLOCK(curthread->td_proc); 3179 } 3180 } 3181 break; 3182 3183 3184 /* 3185 * Set the sampling rate for a sampling mode PMC and the 3186 * initial count for a counting mode PMC. 3187 */ 3188 3189 case PMC_OP_PMCSETCOUNT: 3190 { 3191 struct pmc *pm; 3192 struct pmc_op_pmcsetcount sc; 3193 3194 PMC_DOWNGRADE_SX(); 3195 3196 if ((error = copyin(arg, &sc, sizeof(sc))) != 0) 3197 break; 3198 3199 if ((error = pmc_find_pmc(sc.pm_pmcid, &pm)) != 0) 3200 break; 3201 3202 if (pm->pm_state == PMC_STATE_RUNNING) { 3203 error = EBUSY; 3204 break; 3205 } 3206 3207 if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 3208 pm->pm_sc.pm_reloadcount = sc.pm_count; 3209 else 3210 pm->pm_sc.pm_initial = sc.pm_count; 3211 } 3212 break; 3213 3214 3215 /* 3216 * Start a PMC. 3217 */ 3218 3219 case PMC_OP_PMCSTART: 3220 { 3221 pmc_id_t pmcid; 3222 struct pmc *pm; 3223 struct pmc_op_simple sp; 3224 3225 sx_assert(&pmc_sx, SX_XLOCKED); 3226 3227 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3228 break; 3229 3230 pmcid = sp.pm_pmcid; 3231 3232 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3233 break; 3234 3235 KASSERT(pmcid == pm->pm_id, 3236 ("[pmc,%d] pmcid %x != id %x", __LINE__, 3237 pm->pm_id, pmcid)); 3238 3239 if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ 3240 break; 3241 else if (pm->pm_state != PMC_STATE_STOPPED && 3242 pm->pm_state != PMC_STATE_ALLOCATED) { 3243 error = EINVAL; 3244 break; 3245 } 3246 3247 error = pmc_start(pm); 3248 } 3249 break; 3250 3251 3252 /* 3253 * Stop a PMC. 3254 */ 3255 3256 case PMC_OP_PMCSTOP: 3257 { 3258 pmc_id_t pmcid; 3259 struct pmc *pm; 3260 struct pmc_op_simple sp; 3261 3262 PMC_DOWNGRADE_SX(); 3263 3264 if ((error = copyin(arg, &sp, sizeof(sp))) != 0) 3265 break; 3266 3267 pmcid = sp.pm_pmcid; 3268 3269 /* 3270 * Mark the PMC as inactive and invoke the MD stop 3271 * routines if needed. 3272 */ 3273 3274 if ((error = pmc_find_pmc(pmcid, &pm)) != 0) 3275 break; 3276 3277 KASSERT(pmcid == pm->pm_id, 3278 ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, 3279 pm->pm_id, pmcid)); 3280 3281 if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ 3282 break; 3283 else if (pm->pm_state != PMC_STATE_RUNNING) { 3284 error = EINVAL; 3285 break; 3286 } 3287 3288 error = pmc_stop(pm); 3289 } 3290 break; 3291 3292 3293 /* 3294 * Write a user-entry to the log file. 3295 */ 3296 3297 case PMC_OP_WRITELOG: 3298 { 3299 3300 PMC_DOWNGRADE_SX(); 3301 3302 /* 3303 * flush all per-cpu hash tables 3304 * append user-log entry 3305 */ 3306 3307 error = ENOSYS; 3308 } 3309 break; 3310 3311 3312 #if __i386__ || __amd64__ 3313 3314 /* 3315 * Machine dependent operation for i386-class processors. 3316 * 3317 * Retrieve the MSR number associated with the counter 3318 * 'pmc_id'. This allows processes to directly use RDPMC 3319 * instructions to read their PMCs, without the overhead of a 3320 * system call. 3321 */ 3322 3323 case PMC_OP_PMCX86GETMSR: 3324 { 3325 int ri; 3326 struct pmc *pm; 3327 struct pmc_target *pt; 3328 struct pmc_op_x86_getmsr gm; 3329 3330 PMC_DOWNGRADE_SX(); 3331 3332 /* CPU has no 'GETMSR' support */ 3333 if (md->pmd_get_msr == NULL) { 3334 error = ENOSYS; 3335 break; 3336 } 3337 3338 if ((error = copyin(arg, &gm, sizeof(gm))) != 0) 3339 break; 3340 3341 if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0) 3342 break; 3343 3344 /* 3345 * The allocated PMC has to be a process virtual PMC, 3346 * i.e., of type MODE_T[CS]. Global PMCs can only be 3347 * read using the PMCREAD operation since they may be 3348 * allocated on a different CPU than the one we could 3349 * be running on at the time of the RDPMC instruction. 3350 * 3351 * The GETMSR operation is not allowed for PMCs that 3352 * are inherited across processes. 3353 */ 3354 3355 if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || 3356 (pm->pm_flags & PMC_F_DESCENDANTS)) { 3357 error = EINVAL; 3358 break; 3359 } 3360 3361 /* 3362 * It only makes sense to use a RDPMC (or its 3363 * equivalent instruction on non-x86 architectures) on 3364 * a process that has allocated and attached a PMC to 3365 * itself. Conversely the PMC is only allowed to have 3366 * one process attached to it -- its owner. 3367 */ 3368 3369 if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || 3370 LIST_NEXT(pt, pt_next) != NULL || 3371 pt->pt_process->pp_proc != pm->pm_owner->po_owner) { 3372 error = EINVAL; 3373 break; 3374 } 3375 3376 ri = PMC_TO_ROWINDEX(pm); 3377 3378 if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0) 3379 break; 3380 3381 if ((error = copyout(&gm, arg, sizeof(gm))) < 0) 3382 break; 3383 3384 /* 3385 * Mark our process as using MSRs. Update machine 3386 * state using a forced context switch. 3387 */ 3388 3389 pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; 3390 pmc_force_context_switch(); 3391 3392 } 3393 break; 3394 #endif 3395 3396 default: 3397 error = EINVAL; 3398 break; 3399 } 3400 3401 if (is_sx_downgraded) 3402 sx_sunlock(&pmc_sx); 3403 else 3404 sx_xunlock(&pmc_sx); 3405 3406 if (error) 3407 atomic_add_int(&pmc_stats.pm_syscall_errors, 1); 3408 3409 return error; 3410 } 3411 3412 /* 3413 * Helper functions 3414 */ 3415 3416 /* 3417 * Configure a log file. 3418 */ 3419 3420 static int 3421 pmc_configure_log(struct pmc_owner *po, int logfd) 3422 { 3423 struct proc *p; 3424 3425 return ENOSYS; /* for now */ 3426 3427 p = po->po_owner; 3428 3429 if (po->po_logfd < 0 && logfd < 0) /* nothing to do */ 3430 return 0; 3431 3432 if (po->po_logfd >= 0 && logfd < 0) { 3433 /* deconfigure log */ 3434 /* XXX */ 3435 po->po_flags &= ~PMC_PO_OWNS_LOGFILE; 3436 pmc_maybe_remove_owner(po); 3437 3438 } else if (po->po_logfd < 0 && logfd >= 0) { 3439 /* configure log file */ 3440 /* XXX */ 3441 po->po_flags |= PMC_PO_OWNS_LOGFILE; 3442 3443 /* mark process as using HWPMCs */ 3444 PROC_LOCK(p); 3445 p->p_flag |= P_HWPMC; 3446 PROC_UNLOCK(p); 3447 } else 3448 return EBUSY; 3449 3450 return 0; 3451 } 3452 3453 /* 3454 * Log an exit event to the PMC owner's log file. 3455 */ 3456 3457 static void 3458 pmc_log_process_exit(struct pmc *pm, struct pmc_process *pp) 3459 { 3460 KASSERT(pm->pm_flags & PMC_F_LOG_TC_PROCEXIT, 3461 ("[pmc,%d] log-process-exit called gratuitously", __LINE__)); 3462 3463 (void) pm; 3464 (void) pp; 3465 3466 return; 3467 } 3468 3469 /* 3470 * Event handlers. 3471 */ 3472 3473 /* 3474 * Handle a process exit. 3475 * 3476 * XXX This eventhandler gets called early in the exit process. 3477 * Consider using a 'hook' invocation from thread_exit() or equivalent 3478 * spot. Another negative is that kse_exit doesn't seem to call 3479 * exit1() [??]. 3480 */ 3481 3482 static void 3483 pmc_process_exit(void *arg __unused, struct proc *p) 3484 { 3485 int is_using_hwpmcs; 3486 3487 PROC_LOCK(p); 3488 is_using_hwpmcs = p->p_flag & P_HWPMC; 3489 PROC_UNLOCK(p); 3490 3491 if (is_using_hwpmcs) { 3492 PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid, 3493 p->p_comm); 3494 3495 PMC_GET_SX_XLOCK(); 3496 (void) pmc_hook_handler(curthread, PMC_FN_PROCESS_EXIT, 3497 (void *) p); 3498 sx_xunlock(&pmc_sx); 3499 } 3500 } 3501 3502 /* 3503 * Handle a process fork. 3504 * 3505 * If the parent process 'p1' is under HWPMC monitoring, then copy 3506 * over any attached PMCs that have 'do_descendants' semantics. 3507 */ 3508 3509 static void 3510 pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *p2, 3511 int flags) 3512 { 3513 int is_using_hwpmcs; 3514 3515 (void) flags; /* unused parameter */ 3516 3517 PROC_LOCK(p1); 3518 is_using_hwpmcs = p1->p_flag & P_HWPMC; 3519 PROC_UNLOCK(p1); 3520 3521 if (is_using_hwpmcs) { 3522 PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s)", p1, 3523 p1->p_pid, p1->p_comm); 3524 PMC_GET_SX_XLOCK(); 3525 (void) pmc_hook_handler(curthread, PMC_FN_PROCESS_FORK, 3526 (void *) p2); 3527 sx_xunlock(&pmc_sx); 3528 } 3529 } 3530 3531 3532 /* 3533 * initialization 3534 */ 3535 3536 static const char *pmc_name_of_pmcclass[] = { 3537 #undef __PMC_CLASS 3538 #define __PMC_CLASS(N) #N , 3539 __PMC_CLASSES() 3540 }; 3541 3542 static int 3543 pmc_initialize(void) 3544 { 3545 int error, cpu, n; 3546 struct pmc_binding pb; 3547 3548 md = NULL; 3549 error = 0; 3550 3551 #if DEBUG 3552 /* parse debug flags first */ 3553 if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", 3554 pmc_debugstr, sizeof(pmc_debugstr))) 3555 pmc_debugflags_parse(pmc_debugstr, 3556 pmc_debugstr+strlen(pmc_debugstr)); 3557 #endif 3558 3559 PMCDBG(MOD,INI,0, "PMC Initialize (version %x)", PMC_VERSION); 3560 3561 /* 3562 * check sysctl parameters 3563 */ 3564 3565 if (pmc_hashsize <= 0) { 3566 (void) printf("pmc: sysctl variable \"" 3567 PMC_SYSCTL_NAME_PREFIX "hashsize\" must be greater than " 3568 "zero\n"); 3569 pmc_hashsize = PMC_HASH_SIZE; 3570 } 3571 3572 #if defined(__i386__) 3573 /* determine the CPU kind. This is i386 specific */ 3574 if (strcmp(cpu_vendor, "AuthenticAMD") == 0) 3575 md = pmc_amd_initialize(); 3576 else if (strcmp(cpu_vendor, "GenuineIntel") == 0) 3577 md = pmc_intel_initialize(); 3578 /* XXX: what about the other i386 CPU manufacturers? */ 3579 #elif defined(__amd64__) 3580 if (strcmp(cpu_vendor, "AuthenticAMD") == 0) 3581 md = pmc_amd_initialize(); 3582 #else /* other architectures */ 3583 md = NULL; 3584 #endif 3585 3586 if (md == NULL || md->pmd_init == NULL) 3587 return ENOSYS; 3588 3589 /* allocate space for the per-cpu array */ 3590 MALLOC(pmc_pcpu, struct pmc_cpu **, mp_ncpus * sizeof(struct pmc_cpu *), 3591 M_PMC, M_WAITOK|M_ZERO); 3592 3593 /* per-cpu 'saved values' for managing process-mode PMCs */ 3594 MALLOC(pmc_pcpu_saved, pmc_value_t *, 3595 sizeof(pmc_value_t) * mp_ncpus * md->pmd_npmc, M_PMC, M_WAITOK); 3596 3597 /* perform cpu dependent initialization */ 3598 pmc_save_cpu_binding(&pb); 3599 for (cpu = 0; cpu < mp_ncpus; cpu++) { 3600 if (pmc_cpu_is_disabled(cpu)) 3601 continue; 3602 pmc_select_cpu(cpu); 3603 if ((error = md->pmd_init(cpu)) != 0) 3604 break; 3605 } 3606 pmc_restore_cpu_binding(&pb); 3607 3608 if (error != 0) 3609 return error; 3610 3611 /* allocate space for the row disposition array */ 3612 pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc, 3613 M_PMC, M_WAITOK|M_ZERO); 3614 3615 KASSERT(pmc_pmcdisp != NULL, 3616 ("[pmc,%d] pmcdisp allocation returned NULL", __LINE__)); 3617 3618 /* mark all PMCs as available */ 3619 for (n = 0; n < (int) md->pmd_npmc; n++) 3620 PMC_MARK_ROW_FREE(n); 3621 3622 /* allocate thread hash tables */ 3623 pmc_ownerhash = hashinit(pmc_hashsize, M_PMC, 3624 &pmc_ownerhashmask); 3625 3626 pmc_processhash = hashinit(pmc_hashsize, M_PMC, 3627 &pmc_processhashmask); 3628 mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc", MTX_SPIN); 3629 3630 /* allocate a pool of spin mutexes */ 3631 pmc_mtxpool = mtx_pool_create("pmc", pmc_mtxpool_size, MTX_SPIN); 3632 3633 PMCDBG(MOD,INI,1, "pmc_ownerhash=%p, mask=0x%lx " 3634 "targethash=%p mask=0x%lx", pmc_ownerhash, pmc_ownerhashmask, 3635 pmc_processhash, pmc_processhashmask); 3636 3637 /* register process {exit,fork,exec} handlers */ 3638 pmc_exit_tag = EVENTHANDLER_REGISTER(process_exit, 3639 pmc_process_exit, NULL, EVENTHANDLER_PRI_ANY); 3640 pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork, 3641 pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY); 3642 3643 /* set hook functions */ 3644 pmc_intr = md->pmd_intr; 3645 pmc_hook = pmc_hook_handler; 3646 3647 if (error == 0) { 3648 printf(PMC_MODULE_NAME ":"); 3649 for (n = 0; n < (int) md->pmd_nclass; n++) 3650 printf(" %s(%d)", 3651 pmc_name_of_pmcclass[md->pmd_classes[n].pm_class], 3652 md->pmd_nclasspmcs[n]); 3653 printf("\n"); 3654 } 3655 3656 return error; 3657 } 3658 3659 /* prepare to be unloaded */ 3660 static void 3661 pmc_cleanup(void) 3662 { 3663 int cpu; 3664 struct pmc_ownerhash *ph; 3665 struct pmc_owner *po, *tmp; 3666 struct pmc_binding pb; 3667 #if DEBUG 3668 struct pmc_processhash *prh; 3669 #endif 3670 3671 PMCDBG(MOD,INI,0, "%s", "cleanup"); 3672 3673 pmc_intr = NULL; /* no more interrupts please */ 3674 3675 sx_xlock(&pmc_sx); 3676 if (pmc_hook == NULL) { /* being unloaded already */ 3677 sx_xunlock(&pmc_sx); 3678 return; 3679 } 3680 3681 pmc_hook = NULL; /* prevent new threads from entering module */ 3682 3683 /* deregister event handlers */ 3684 EVENTHANDLER_DEREGISTER(process_fork, pmc_fork_tag); 3685 EVENTHANDLER_DEREGISTER(process_exit, pmc_exit_tag); 3686 3687 /* send SIGBUS to all owner threads, free up allocations */ 3688 if (pmc_ownerhash) 3689 for (ph = pmc_ownerhash; 3690 ph <= &pmc_ownerhash[pmc_ownerhashmask]; 3691 ph++) { 3692 LIST_FOREACH_SAFE(po, ph, po_next, tmp) { 3693 pmc_remove_owner(po); 3694 3695 /* send SIGBUS to owner processes */ 3696 PMCDBG(MOD,INI,2, "cleanup signal proc=%p " 3697 "(%d, %s)", po->po_owner, 3698 po->po_owner->p_pid, 3699 po->po_owner->p_comm); 3700 3701 PROC_LOCK(po->po_owner); 3702 psignal(po->po_owner, SIGBUS); 3703 PROC_UNLOCK(po->po_owner); 3704 FREE(po, M_PMC); 3705 } 3706 } 3707 3708 /* reclaim allocated data structures */ 3709 if (pmc_mtxpool) 3710 mtx_pool_destroy(&pmc_mtxpool); 3711 3712 mtx_destroy(&pmc_processhash_mtx); 3713 if (pmc_processhash) { 3714 #if DEBUG 3715 struct pmc_process *pp; 3716 3717 PMCDBG(MOD,INI,3, "%s", "destroy process hash"); 3718 for (prh = pmc_processhash; 3719 prh <= &pmc_processhash[pmc_processhashmask]; 3720 prh++) 3721 LIST_FOREACH(pp, prh, pp_next) 3722 PMCDBG(MOD,INI,3, "pid=%d", pp->pp_proc->p_pid); 3723 #endif 3724 3725 hashdestroy(pmc_processhash, M_PMC, pmc_processhashmask); 3726 pmc_processhash = NULL; 3727 } 3728 3729 if (pmc_ownerhash) { 3730 PMCDBG(MOD,INI,3, "%s", "destroy owner hash"); 3731 hashdestroy(pmc_ownerhash, M_PMC, pmc_ownerhashmask); 3732 pmc_ownerhash = NULL; 3733 } 3734 3735 /* do processor dependent cleanup */ 3736 PMCDBG(MOD,INI,3, "%s", "md cleanup"); 3737 if (md) { 3738 pmc_save_cpu_binding(&pb); 3739 for (cpu = 0; cpu < mp_ncpus; cpu++) { 3740 PMCDBG(MOD,INI,1,"pmc-cleanup cpu=%d pcs=%p", 3741 cpu, pmc_pcpu[cpu]); 3742 if (pmc_cpu_is_disabled(cpu)) 3743 continue; 3744 pmc_select_cpu(cpu); 3745 if (pmc_pcpu[cpu]) 3746 (void) md->pmd_cleanup(cpu); 3747 } 3748 FREE(md, M_PMC); 3749 md = NULL; 3750 pmc_restore_cpu_binding(&pb); 3751 } 3752 3753 /* deallocate per-cpu structures */ 3754 FREE(pmc_pcpu, M_PMC); 3755 pmc_pcpu = NULL; 3756 3757 FREE(pmc_pcpu_saved, M_PMC); 3758 pmc_pcpu_saved = NULL; 3759 3760 if (pmc_pmcdisp) { 3761 FREE(pmc_pmcdisp, M_PMC); 3762 pmc_pmcdisp = NULL; 3763 } 3764 3765 sx_xunlock(&pmc_sx); /* we are done */ 3766 } 3767 3768 /* 3769 * The function called at load/unload. 3770 */ 3771 3772 static int 3773 load (struct module *module __unused, int cmd, void *arg __unused) 3774 { 3775 int error; 3776 3777 error = 0; 3778 3779 switch (cmd) { 3780 case MOD_LOAD : 3781 /* initialize the subsystem */ 3782 error = pmc_initialize(); 3783 if (error != 0) 3784 break; 3785 PMCDBG(MOD,INI,1, "syscall=%d ncpus=%d", 3786 pmc_syscall_num, mp_ncpus); 3787 break; 3788 3789 3790 case MOD_UNLOAD : 3791 case MOD_SHUTDOWN: 3792 pmc_cleanup(); 3793 PMCDBG(MOD,INI,1, "%s", "unloaded"); 3794 break; 3795 3796 default : 3797 error = EINVAL; /* XXX should panic(9) */ 3798 break; 3799 } 3800 3801 return error; 3802 } 3803 3804 /* memory pool */ 3805 MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module"); 3806