1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/thread.h> 29 #include <sys/cpuvar.h> 30 #include <sys/inttypes.h> 31 #include <sys/cmn_err.h> 32 #include <sys/time.h> 33 #include <sys/ksynch.h> 34 #include <sys/systm.h> 35 #include <sys/kcpc.h> 36 #include <sys/cpc_impl.h> 37 #include <sys/cpc_pcbe.h> 38 #include <sys/atomic.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/sdt.h> 42 #if defined(__x86) 43 #include <asm/clock.h> 44 #endif 45 46 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 47 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 48 49 50 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 51 int kcpc_cpuctx; /* number of cpu-specific contexts */ 52 53 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 54 55 /* 56 * These are set when a PCBE module is loaded. 57 */ 58 uint_t cpc_ncounters = 0; 59 pcbe_ops_t *pcbe_ops = NULL; 60 61 /* 62 * Statistics on (mis)behavior 63 */ 64 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 65 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 66 67 /* 68 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread 69 * with no valid context will result in a panic. 70 */ 71 static int kcpc_nullctx_panic = 0; 72 73 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 74 static void kcpc_restore(kcpc_ctx_t *ctx); 75 static void kcpc_save(kcpc_ctx_t *ctx); 76 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 77 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 78 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 79 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 80 81 void 82 kcpc_register_pcbe(pcbe_ops_t *ops) 83 { 84 pcbe_ops = ops; 85 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 86 } 87 88 void 89 kcpc_register_dcpc(void (*func)(uint64_t)) 90 { 91 dtrace_cpc_fire = func; 92 } 93 94 void 95 kcpc_unregister_dcpc(void) 96 { 97 dtrace_cpc_fire = NULL; 98 } 99 100 int 101 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 102 { 103 cpu_t *cp; 104 kcpc_ctx_t *ctx; 105 int error; 106 107 ctx = kcpc_ctx_alloc(); 108 109 if (kcpc_assign_reqs(set, ctx) != 0) { 110 kcpc_ctx_free(ctx); 111 *subcode = CPC_RESOURCE_UNAVAIL; 112 return (EINVAL); 113 } 114 115 ctx->kc_cpuid = cpuid; 116 ctx->kc_thread = curthread; 117 118 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 119 120 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 121 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 122 kcpc_ctx_free(ctx); 123 return (error); 124 } 125 126 set->ks_ctx = ctx; 127 ctx->kc_set = set; 128 129 /* 130 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 131 * we are manipulating the cpu_t and programming the hardware, else the 132 * the cpu_t could go away while we're looking at it. 133 */ 134 mutex_enter(&cpu_lock); 135 cp = cpu_get(cpuid); 136 137 if (cp == NULL) 138 /* 139 * The CPU could have been DRd out while we were getting set up. 140 */ 141 goto unbound; 142 143 mutex_enter(&cp->cpu_cpc_ctxlock); 144 145 if (cp->cpu_cpc_ctx != NULL) { 146 /* 147 * If this CPU already has a bound set, return an error. 148 */ 149 mutex_exit(&cp->cpu_cpc_ctxlock); 150 goto unbound; 151 } 152 153 if (curthread->t_bind_cpu != cpuid) { 154 mutex_exit(&cp->cpu_cpc_ctxlock); 155 goto unbound; 156 } 157 cp->cpu_cpc_ctx = ctx; 158 159 /* 160 * Kernel preemption must be disabled while fiddling with the hardware 161 * registers to prevent partial updates. 162 */ 163 kpreempt_disable(); 164 ctx->kc_rawtick = KCPC_GET_TICK(); 165 pcbe_ops->pcbe_program(ctx); 166 kpreempt_enable(); 167 168 mutex_exit(&cp->cpu_cpc_ctxlock); 169 mutex_exit(&cpu_lock); 170 171 mutex_enter(&set->ks_lock); 172 set->ks_state |= KCPC_SET_BOUND; 173 cv_signal(&set->ks_condv); 174 mutex_exit(&set->ks_lock); 175 176 return (0); 177 178 unbound: 179 mutex_exit(&cpu_lock); 180 set->ks_ctx = NULL; 181 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 182 kcpc_ctx_free(ctx); 183 return (EAGAIN); 184 } 185 186 int 187 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 188 { 189 kcpc_ctx_t *ctx; 190 int error; 191 192 /* 193 * Only one set is allowed per context, so ensure there is no 194 * existing context. 195 */ 196 197 if (t->t_cpc_ctx != NULL) 198 return (EEXIST); 199 200 ctx = kcpc_ctx_alloc(); 201 202 /* 203 * The context must begin life frozen until it has been properly 204 * programmed onto the hardware. This prevents the context ops from 205 * worrying about it until we're ready. 206 */ 207 ctx->kc_flags |= KCPC_CTX_FREEZE; 208 ctx->kc_hrtime = gethrtime(); 209 210 if (kcpc_assign_reqs(set, ctx) != 0) { 211 kcpc_ctx_free(ctx); 212 *subcode = CPC_RESOURCE_UNAVAIL; 213 return (EINVAL); 214 } 215 216 ctx->kc_cpuid = -1; 217 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 218 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 219 ctx->kc_thread = t; 220 t->t_cpc_ctx = ctx; 221 /* 222 * Permit threads to look at their own hardware counters from userland. 223 */ 224 ctx->kc_flags |= KCPC_CTX_NONPRIV; 225 226 /* 227 * Create the data store for this set. 228 */ 229 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 230 231 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 232 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 233 kcpc_ctx_free(ctx); 234 t->t_cpc_ctx = NULL; 235 return (error); 236 } 237 238 set->ks_ctx = ctx; 239 ctx->kc_set = set; 240 241 /* 242 * Add a device context to the subject thread. 243 */ 244 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 245 kcpc_lwp_create, NULL, kcpc_free); 246 247 /* 248 * Ask the backend to program the hardware. 249 */ 250 if (t == curthread) { 251 kpreempt_disable(); 252 ctx->kc_rawtick = KCPC_GET_TICK(); 253 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 254 pcbe_ops->pcbe_program(ctx); 255 kpreempt_enable(); 256 } else 257 /* 258 * Since we are the agent LWP, we know the victim LWP is stopped 259 * until we're done here; no need to worry about preemption or 260 * migration here. We still use an atomic op to clear the flag 261 * to ensure the flags are always self-consistent; they can 262 * still be accessed from, for instance, another CPU doing a 263 * kcpc_invalidate_all(). 264 */ 265 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 266 267 mutex_enter(&set->ks_lock); 268 set->ks_state |= KCPC_SET_BOUND; 269 cv_signal(&set->ks_condv); 270 mutex_exit(&set->ks_lock); 271 272 return (0); 273 } 274 275 /* 276 * Walk through each request in the set and ask the PCBE to configure a 277 * corresponding counter. 278 */ 279 int 280 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 281 { 282 int i; 283 int ret; 284 kcpc_request_t *rp; 285 286 for (i = 0; i < set->ks_nreqs; i++) { 287 int n; 288 rp = &set->ks_req[i]; 289 290 n = rp->kr_picnum; 291 292 ASSERT(n >= 0 && n < cpc_ncounters); 293 294 ASSERT(ctx->kc_pics[n].kp_req == NULL); 295 296 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 297 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 298 == 0) { 299 *subcode = -1; 300 return (ENOTSUP); 301 } 302 /* 303 * If any of the counters have requested overflow 304 * notification, we flag the context as being one that 305 * cares about overflow. 306 */ 307 ctx->kc_flags |= KCPC_CTX_SIGOVF; 308 } 309 310 rp->kr_config = NULL; 311 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 312 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 313 &(rp->kr_config), (void *)ctx)) != 0) { 314 kcpc_free_configs(set); 315 *subcode = ret; 316 switch (ret) { 317 case CPC_ATTR_REQUIRES_PRIVILEGE: 318 case CPC_HV_NO_ACCESS: 319 return (EACCES); 320 default: 321 return (EINVAL); 322 } 323 } 324 325 ctx->kc_pics[n].kp_req = rp; 326 rp->kr_picp = &ctx->kc_pics[n]; 327 rp->kr_data = set->ks_data + rp->kr_index; 328 *rp->kr_data = rp->kr_preset; 329 } 330 331 return (0); 332 } 333 334 void 335 kcpc_free_configs(kcpc_set_t *set) 336 { 337 int i; 338 339 for (i = 0; i < set->ks_nreqs; i++) 340 if (set->ks_req[i].kr_config != NULL) 341 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 342 } 343 344 /* 345 * buf points to a user address and the data should be copied out to that 346 * address in the current process. 347 */ 348 int 349 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 350 { 351 kcpc_ctx_t *ctx = set->ks_ctx; 352 uint64_t curtick = KCPC_GET_TICK(); 353 354 mutex_enter(&set->ks_lock); 355 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 356 mutex_exit(&set->ks_lock); 357 return (EINVAL); 358 } 359 mutex_exit(&set->ks_lock); 360 361 if (ctx->kc_flags & KCPC_CTX_INVALID) 362 return (EAGAIN); 363 364 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 365 /* 366 * Kernel preemption must be disabled while reading the 367 * hardware regs, and if this is a CPU-bound context, while 368 * checking the CPU binding of the current thread. 369 */ 370 kpreempt_disable(); 371 372 if (ctx->kc_cpuid != -1) { 373 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 374 kpreempt_enable(); 375 return (EAGAIN); 376 } 377 } 378 379 if (ctx->kc_thread == curthread) { 380 ctx->kc_hrtime = gethrtime(); 381 pcbe_ops->pcbe_sample(ctx); 382 ctx->kc_vtick += curtick - ctx->kc_rawtick; 383 ctx->kc_rawtick = curtick; 384 } 385 386 kpreempt_enable(); 387 388 /* 389 * The config may have been invalidated by 390 * the pcbe_sample op. 391 */ 392 if (ctx->kc_flags & KCPC_CTX_INVALID) 393 return (EAGAIN); 394 } 395 396 if (copyout(set->ks_data, buf, 397 set->ks_nreqs * sizeof (uint64_t)) == -1) 398 return (EFAULT); 399 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 400 return (EFAULT); 401 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 402 return (EFAULT); 403 404 return (0); 405 } 406 407 /* 408 * Stop the counters on the CPU this context is bound to. 409 */ 410 static void 411 kcpc_stop_hw(kcpc_ctx_t *ctx) 412 { 413 cpu_t *cp; 414 415 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 416 == KCPC_CTX_INVALID); 417 418 kpreempt_disable(); 419 420 cp = cpu_get(ctx->kc_cpuid); 421 ASSERT(cp != NULL); 422 423 if (cp == CPU) { 424 pcbe_ops->pcbe_allstop(); 425 atomic_or_uint(&ctx->kc_flags, 426 KCPC_CTX_INVALID_STOPPED); 427 } else 428 kcpc_remote_stop(cp); 429 kpreempt_enable(); 430 } 431 432 int 433 kcpc_unbind(kcpc_set_t *set) 434 { 435 kcpc_ctx_t *ctx; 436 kthread_t *t; 437 438 /* 439 * We could be racing with the process's agent thread as it 440 * binds the set; we must wait for the set to finish binding 441 * before attempting to tear it down. 442 */ 443 mutex_enter(&set->ks_lock); 444 while ((set->ks_state & KCPC_SET_BOUND) == 0) 445 cv_wait(&set->ks_condv, &set->ks_lock); 446 mutex_exit(&set->ks_lock); 447 448 ctx = set->ks_ctx; 449 450 /* 451 * Use kc_lock to synchronize with kcpc_restore(). 452 */ 453 mutex_enter(&ctx->kc_lock); 454 ctx->kc_flags |= KCPC_CTX_INVALID; 455 mutex_exit(&ctx->kc_lock); 456 457 if (ctx->kc_cpuid == -1) { 458 t = ctx->kc_thread; 459 /* 460 * The context is thread-bound and therefore has a device 461 * context. It will be freed via removectx() calling 462 * freectx() calling kcpc_free(). 463 */ 464 if (t == curthread && 465 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 466 kpreempt_disable(); 467 pcbe_ops->pcbe_allstop(); 468 atomic_or_uint(&ctx->kc_flags, 469 KCPC_CTX_INVALID_STOPPED); 470 kpreempt_enable(); 471 } 472 #ifdef DEBUG 473 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 474 kcpc_lwp_create, NULL, kcpc_free) == 0) 475 panic("kcpc_unbind: context %p not preset on thread %p", 476 (void *)ctx, (void *)t); 477 #else 478 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 479 kcpc_lwp_create, NULL, kcpc_free); 480 #endif /* DEBUG */ 481 t->t_cpc_set = NULL; 482 t->t_cpc_ctx = NULL; 483 } else { 484 /* 485 * If we are unbinding a CPU-bound set from a remote CPU, the 486 * native CPU's idle thread could be in the midst of programming 487 * this context onto the CPU. We grab the context's lock here to 488 * ensure that the idle thread is done with it. When we release 489 * the lock, the CPU no longer has a context and the idle thread 490 * will move on. 491 * 492 * cpu_lock must be held to prevent the CPU from being DR'd out 493 * while we disassociate the context from the cpu_t. 494 */ 495 cpu_t *cp; 496 mutex_enter(&cpu_lock); 497 cp = cpu_get(ctx->kc_cpuid); 498 if (cp != NULL) { 499 /* 500 * The CPU may have been DR'd out of the system. 501 */ 502 mutex_enter(&cp->cpu_cpc_ctxlock); 503 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 504 kcpc_stop_hw(ctx); 505 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 506 cp->cpu_cpc_ctx = NULL; 507 mutex_exit(&cp->cpu_cpc_ctxlock); 508 } 509 mutex_exit(&cpu_lock); 510 if (ctx->kc_thread == curthread) { 511 kcpc_free(ctx, 0); 512 curthread->t_cpc_set = NULL; 513 } 514 } 515 516 return (0); 517 } 518 519 int 520 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 521 { 522 int i; 523 524 ASSERT(set != NULL); 525 ASSERT(set->ks_state & KCPC_SET_BOUND); 526 ASSERT(set->ks_ctx->kc_thread == curthread); 527 ASSERT(set->ks_ctx->kc_cpuid == -1); 528 529 if (index < 0 || index >= set->ks_nreqs) 530 return (EINVAL); 531 532 for (i = 0; i < set->ks_nreqs; i++) 533 if (set->ks_req[i].kr_index == index) 534 break; 535 ASSERT(i != set->ks_nreqs); 536 537 set->ks_req[i].kr_preset = preset; 538 return (0); 539 } 540 541 int 542 kcpc_restart(kcpc_set_t *set) 543 { 544 kcpc_ctx_t *ctx = set->ks_ctx; 545 int i; 546 547 ASSERT(set->ks_state & KCPC_SET_BOUND); 548 ASSERT(ctx->kc_thread == curthread); 549 ASSERT(ctx->kc_cpuid == -1); 550 551 kpreempt_disable(); 552 553 /* 554 * If the user is doing this on a running set, make sure the counters 555 * are stopped first. 556 */ 557 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 558 pcbe_ops->pcbe_allstop(); 559 560 for (i = 0; i < set->ks_nreqs; i++) { 561 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 562 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 563 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 564 } 565 566 /* 567 * Ask the backend to program the hardware. 568 */ 569 ctx->kc_rawtick = KCPC_GET_TICK(); 570 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 571 pcbe_ops->pcbe_program(ctx); 572 kpreempt_enable(); 573 574 return (0); 575 } 576 577 /* 578 * Caller must hold kcpc_cpuctx_lock. 579 */ 580 int 581 kcpc_enable(kthread_t *t, int cmd, int enable) 582 { 583 kcpc_ctx_t *ctx = t->t_cpc_ctx; 584 kcpc_set_t *set = t->t_cpc_set; 585 kcpc_set_t *newset; 586 int i; 587 int flag; 588 int err; 589 590 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 591 592 if (ctx == NULL) { 593 /* 594 * This thread has a set but no context; it must be a 595 * CPU-bound set. 596 */ 597 ASSERT(t->t_cpc_set != NULL); 598 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 599 return (EINVAL); 600 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 601 return (EAGAIN); 602 603 if (cmd == CPC_ENABLE) { 604 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 605 return (EINVAL); 606 kpreempt_disable(); 607 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 608 kcpc_restore(ctx); 609 kpreempt_enable(); 610 } else if (cmd == CPC_DISABLE) { 611 if (ctx->kc_flags & KCPC_CTX_FREEZE) 612 return (EINVAL); 613 kpreempt_disable(); 614 kcpc_save(ctx); 615 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 616 kpreempt_enable(); 617 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 618 /* 619 * Strategy for usr/sys: stop counters and update set's presets 620 * with current counter values, unbind, update requests with 621 * new config, then re-bind. 622 */ 623 flag = (cmd == CPC_USR_EVENTS) ? 624 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 625 626 kpreempt_disable(); 627 atomic_or_uint(&ctx->kc_flags, 628 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 629 pcbe_ops->pcbe_allstop(); 630 kpreempt_enable(); 631 for (i = 0; i < set->ks_nreqs; i++) { 632 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 633 if (enable) 634 set->ks_req[i].kr_flags |= flag; 635 else 636 set->ks_req[i].kr_flags &= ~flag; 637 } 638 newset = kcpc_dup_set(set); 639 if (kcpc_unbind(set) != 0) 640 return (EINVAL); 641 t->t_cpc_set = newset; 642 if (kcpc_bind_thread(newset, t, &err) != 0) { 643 t->t_cpc_set = NULL; 644 kcpc_free_set(newset); 645 return (EINVAL); 646 } 647 } else 648 return (EINVAL); 649 650 return (0); 651 } 652 653 /* 654 * Provide PCBEs with a way of obtaining the configs of every counter which will 655 * be programmed together. 656 * 657 * If current is NULL, provide the first config. 658 * 659 * If data != NULL, caller wants to know where the data store associated with 660 * the config we return is located. 661 */ 662 void * 663 kcpc_next_config(void *token, void *current, uint64_t **data) 664 { 665 int i; 666 kcpc_pic_t *pic; 667 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 668 669 if (current == NULL) { 670 /* 671 * Client would like the first config, which may not be in 672 * counter 0; we need to search through the counters for the 673 * first config. 674 */ 675 for (i = 0; i < cpc_ncounters; i++) 676 if (ctx->kc_pics[i].kp_req != NULL) 677 break; 678 /* 679 * There are no counters configured for the given context. 680 */ 681 if (i == cpc_ncounters) 682 return (NULL); 683 } else { 684 /* 685 * There surely is a faster way to do this. 686 */ 687 for (i = 0; i < cpc_ncounters; i++) { 688 pic = &ctx->kc_pics[i]; 689 690 if (pic->kp_req != NULL && 691 current == pic->kp_req->kr_config) 692 break; 693 } 694 695 /* 696 * We found the current config at picnum i. Now search for the 697 * next configured PIC. 698 */ 699 for (i++; i < cpc_ncounters; i++) { 700 pic = &ctx->kc_pics[i]; 701 if (pic->kp_req != NULL) 702 break; 703 } 704 705 if (i == cpc_ncounters) 706 return (NULL); 707 } 708 709 if (data != NULL) { 710 *data = ctx->kc_pics[i].kp_req->kr_data; 711 } 712 713 return (ctx->kc_pics[i].kp_req->kr_config); 714 } 715 716 717 kcpc_ctx_t * 718 kcpc_ctx_alloc(void) 719 { 720 kcpc_ctx_t *ctx; 721 long hash; 722 723 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP); 724 725 hash = CPC_HASH_CTX(ctx); 726 mutex_enter(&kcpc_ctx_llock[hash]); 727 ctx->kc_next = kcpc_ctx_list[hash]; 728 kcpc_ctx_list[hash] = ctx; 729 mutex_exit(&kcpc_ctx_llock[hash]); 730 731 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 732 cpc_ncounters, KM_SLEEP); 733 734 ctx->kc_cpuid = -1; 735 736 return (ctx); 737 } 738 739 /* 740 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 741 * in the flags. 742 */ 743 static void 744 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 745 { 746 kcpc_set_t *ks = ctx->kc_set, *cks; 747 int i, j; 748 int code; 749 750 ASSERT(ks != NULL); 751 752 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 753 return; 754 755 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 756 cks->ks_state &= ~KCPC_SET_BOUND; 757 cctx->kc_set = cks; 758 cks->ks_flags = ks->ks_flags; 759 cks->ks_nreqs = ks->ks_nreqs; 760 cks->ks_req = kmem_alloc(cks->ks_nreqs * 761 sizeof (kcpc_request_t), KM_SLEEP); 762 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 763 KM_SLEEP); 764 cks->ks_ctx = cctx; 765 766 for (i = 0; i < cks->ks_nreqs; i++) { 767 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 768 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 769 (void) strncpy(cks->ks_req[i].kr_event, 770 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 771 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 772 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 773 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 774 if (ks->ks_req[i].kr_nattrs > 0) { 775 cks->ks_req[i].kr_attr = 776 kmem_alloc(ks->ks_req[i].kr_nattrs * 777 sizeof (kcpc_attr_t), KM_SLEEP); 778 } 779 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 780 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 781 ks->ks_req[i].kr_attr[j].ka_name, 782 CPC_MAX_ATTR_LEN); 783 cks->ks_req[i].kr_attr[j].ka_val = 784 ks->ks_req[i].kr_attr[j].ka_val; 785 } 786 } 787 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 788 kcpc_invalidate_config(cctx); 789 790 mutex_enter(&cks->ks_lock); 791 cks->ks_state |= KCPC_SET_BOUND; 792 cv_signal(&cks->ks_condv); 793 mutex_exit(&cks->ks_lock); 794 } 795 796 797 void 798 kcpc_ctx_free(kcpc_ctx_t *ctx) 799 { 800 kcpc_ctx_t **loc; 801 long hash = CPC_HASH_CTX(ctx); 802 803 mutex_enter(&kcpc_ctx_llock[hash]); 804 loc = &kcpc_ctx_list[hash]; 805 ASSERT(*loc != NULL); 806 while (*loc != ctx) 807 loc = &(*loc)->kc_next; 808 *loc = ctx->kc_next; 809 mutex_exit(&kcpc_ctx_llock[hash]); 810 811 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 812 cv_destroy(&ctx->kc_condv); 813 mutex_destroy(&ctx->kc_lock); 814 kmem_free(ctx, sizeof (*ctx)); 815 } 816 817 /* 818 * Generic interrupt handler used on hardware that generates 819 * overflow interrupts. 820 * 821 * Note: executed at high-level interrupt context! 822 */ 823 /*ARGSUSED*/ 824 kcpc_ctx_t * 825 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 826 { 827 kcpc_ctx_t *ctx; 828 kthread_t *t = curthread; 829 int i; 830 831 /* 832 * On both x86 and UltraSPARC, we may deliver the high-level 833 * interrupt in kernel mode, just after we've started to run an 834 * interrupt thread. (That's because the hardware helpfully 835 * delivers the overflow interrupt some random number of cycles 836 * after the instruction that caused the overflow by which time 837 * we're in some part of the kernel, not necessarily running on 838 * the right thread). 839 * 840 * Check for this case here -- find the pinned thread 841 * that was running when the interrupt went off. 842 */ 843 if (t->t_flag & T_INTR_THREAD) { 844 klwp_t *lwp; 845 846 atomic_add_32(&kcpc_intrctx_count, 1); 847 848 /* 849 * Note that t_lwp is always set to point at the underlying 850 * thread, thus this will work in the presence of nested 851 * interrupts. 852 */ 853 ctx = NULL; 854 if ((lwp = t->t_lwp) != NULL) { 855 t = lwptot(lwp); 856 ctx = t->t_cpc_ctx; 857 } 858 } else 859 ctx = t->t_cpc_ctx; 860 861 if (ctx == NULL) { 862 /* 863 * This can easily happen if we're using the counters in 864 * "shared" mode, for example, and an overflow interrupt 865 * occurs while we are running cpustat. In that case, the 866 * bound thread that has the context that belongs to this 867 * CPU is almost certainly sleeping (if it was running on 868 * the CPU we'd have found it above), and the actual 869 * interrupted thread has no knowledge of performance counters! 870 */ 871 ctx = curthread->t_cpu->cpu_cpc_ctx; 872 if (ctx != NULL) { 873 /* 874 * Return the bound context for this CPU to 875 * the interrupt handler so that it can synchronously 876 * sample the hardware counters and restart them. 877 */ 878 return (ctx); 879 } 880 881 /* 882 * As long as the overflow interrupt really is delivered early 883 * enough after trapping into the kernel to avoid switching 884 * threads, we must always be able to find the cpc context, 885 * or something went terribly wrong i.e. we ended up 886 * running a passivated interrupt thread, a kernel 887 * thread or we interrupted idle, all of which are Very Bad. 888 * 889 * We also could end up here owing to an incredibly unlikely 890 * race condition that exists on x86 based architectures when 891 * the cpc provider is in use; overflow interrupts are directed 892 * to the cpc provider if the 'dtrace_cpc_in_use' variable is 893 * set when we enter the handler. This variable is unset after 894 * overflow interrupts have been disabled on all CPUs and all 895 * contexts have been torn down. To stop interrupts, the cpc 896 * provider issues a xcall to the remote CPU before it tears 897 * down that CPUs context. As high priority xcalls, on an x86 898 * architecture, execute at a higher PIL than this handler, it 899 * is possible (though extremely unlikely) that the xcall could 900 * interrupt the overflow handler before the handler has 901 * checked the 'dtrace_cpc_in_use' variable, stop the counters, 902 * return to the cpc provider which could then rip down 903 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs 904 * overflow handler has had a chance to check the variable. In 905 * that case, the handler would direct the overflow into this 906 * code and no valid context will be found. The default behavior 907 * when no valid context is found is now to shout a warning to 908 * the console and bump the 'kcpc_nullctx_count' variable. 909 */ 910 if (kcpc_nullctx_panic) 911 panic("null cpc context, thread %p", (void *)t); 912 913 cmn_err(CE_WARN, 914 "null cpc context found in overflow handler!\n"); 915 atomic_add_32(&kcpc_nullctx_count, 1); 916 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 917 /* 918 * Schedule an ast to sample the counters, which will 919 * propagate any overflow into the virtualized performance 920 * counter(s), and may deliver a signal. 921 */ 922 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 923 /* 924 * If a counter has overflowed which was counting on behalf of 925 * a request which specified CPC_OVF_NOTIFY_EMT, send the 926 * process a signal. 927 */ 928 for (i = 0; i < cpc_ncounters; i++) { 929 if (ctx->kc_pics[i].kp_req != NULL && 930 bitmap & (1 << i) && 931 ctx->kc_pics[i].kp_req->kr_flags & 932 CPC_OVF_NOTIFY_EMT) { 933 /* 934 * A signal has been requested for this PIC, so 935 * so freeze the context. The interrupt handler 936 * has already stopped the counter hardware. 937 */ 938 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 939 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 940 KCPC_PIC_OVERFLOWED); 941 } 942 } 943 aston(t); 944 } 945 return (NULL); 946 } 947 948 /* 949 * The current thread context had an overflow interrupt; we're 950 * executing here in high-level interrupt context. 951 */ 952 /*ARGSUSED*/ 953 uint_t 954 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 955 { 956 kcpc_ctx_t *ctx; 957 uint64_t bitmap; 958 uint8_t *state; 959 960 if (pcbe_ops == NULL || 961 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 962 return (DDI_INTR_UNCLAIMED); 963 964 /* 965 * Prevent any further interrupts. 966 */ 967 pcbe_ops->pcbe_allstop(); 968 969 if (dtrace_cpc_in_use) { 970 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state; 971 972 /* 973 * Set the per-CPU state bit to indicate that we are currently 974 * processing an interrupt if it is currently free. Drop the 975 * interrupt if the state isn't free (i.e. a configuration 976 * event is taking place). 977 */ 978 if (atomic_cas_8(state, DCPC_INTR_FREE, 979 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) { 980 int i; 981 kcpc_request_t req; 982 983 ASSERT(dtrace_cpc_fire != NULL); 984 985 (*dtrace_cpc_fire)(bitmap); 986 987 ctx = curthread->t_cpu->cpu_cpc_ctx; 988 989 /* Reset any counters that have overflowed */ 990 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) { 991 req = ctx->kc_set->ks_req[i]; 992 993 if (bitmap & (1 << req.kr_picnum)) { 994 pcbe_ops->pcbe_configure(req.kr_picnum, 995 req.kr_event, req.kr_preset, 996 req.kr_flags, req.kr_nattrs, 997 req.kr_attr, &(req.kr_config), 998 (void *)ctx); 999 } 1000 } 1001 pcbe_ops->pcbe_program(ctx); 1002 1003 /* 1004 * We've finished processing the interrupt so set 1005 * the state back to free. 1006 */ 1007 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state = 1008 DCPC_INTR_FREE; 1009 membar_producer(); 1010 } 1011 return (DDI_INTR_CLAIMED); 1012 } 1013 1014 /* 1015 * DTrace isn't involved so pass on accordingly. 1016 * 1017 * If the interrupt has occurred in the context of an lwp owning 1018 * the counters, then the handler posts an AST to the lwp to 1019 * trigger the actual sampling, and optionally deliver a signal or 1020 * restart the counters, on the way out of the kernel using 1021 * kcpc_hw_overflow_ast() (see below). 1022 * 1023 * On the other hand, if the handler returns the context to us 1024 * directly, then it means that there are no other threads in 1025 * the middle of updating it, no AST has been posted, and so we 1026 * should sample the counters here, and restart them with no 1027 * further fuss. 1028 */ 1029 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 1030 uint64_t curtick = KCPC_GET_TICK(); 1031 1032 ctx->kc_hrtime = gethrtime_waitfree(); 1033 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1034 ctx->kc_rawtick = curtick; 1035 pcbe_ops->pcbe_sample(ctx); 1036 pcbe_ops->pcbe_program(ctx); 1037 } 1038 1039 return (DDI_INTR_CLAIMED); 1040 } 1041 1042 /* 1043 * Called from trap() when processing the ast posted by the high-level 1044 * interrupt handler. 1045 */ 1046 int 1047 kcpc_overflow_ast() 1048 { 1049 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1050 int i; 1051 int found = 0; 1052 uint64_t curtick = KCPC_GET_TICK(); 1053 1054 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 1055 1056 /* 1057 * An overflow happened: sample the context to ensure that 1058 * the overflow is propagated into the upper bits of the 1059 * virtualized 64-bit counter(s). 1060 */ 1061 kpreempt_disable(); 1062 ctx->kc_hrtime = gethrtime_waitfree(); 1063 pcbe_ops->pcbe_sample(ctx); 1064 kpreempt_enable(); 1065 1066 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1067 1068 /* 1069 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 1070 * if that pic generated an overflow and if the request it was counting 1071 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 1072 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 1073 * found any overflowed pics, keep the context frozen and return true 1074 * (thus causing a signal to be sent). 1075 */ 1076 for (i = 0; i < cpc_ncounters; i++) { 1077 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1078 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1079 ~KCPC_PIC_OVERFLOWED); 1080 found = 1; 1081 } 1082 } 1083 if (found) 1084 return (1); 1085 1086 /* 1087 * Otherwise, re-enable the counters and continue life as before. 1088 */ 1089 kpreempt_disable(); 1090 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 1091 pcbe_ops->pcbe_program(ctx); 1092 kpreempt_enable(); 1093 return (0); 1094 } 1095 1096 /* 1097 * Called when switching away from current thread. 1098 */ 1099 static void 1100 kcpc_save(kcpc_ctx_t *ctx) 1101 { 1102 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1103 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 1104 return; 1105 /* 1106 * This context has been invalidated but the counters have not 1107 * been stopped. Stop them here and mark the context stopped. 1108 */ 1109 pcbe_ops->pcbe_allstop(); 1110 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1111 return; 1112 } 1113 1114 pcbe_ops->pcbe_allstop(); 1115 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1116 return; 1117 1118 /* 1119 * Need to sample for all reqs into each req's current mpic. 1120 */ 1121 ctx->kc_hrtime = gethrtime(); 1122 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1123 pcbe_ops->pcbe_sample(ctx); 1124 } 1125 1126 static void 1127 kcpc_restore(kcpc_ctx_t *ctx) 1128 { 1129 mutex_enter(&ctx->kc_lock); 1130 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1131 KCPC_CTX_INVALID) 1132 /* 1133 * The context is invalidated but has not been marked stopped. 1134 * We mark it as such here because we will not start the 1135 * counters during this context switch. 1136 */ 1137 ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED; 1138 1139 1140 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1141 mutex_exit(&ctx->kc_lock); 1142 return; 1143 } 1144 1145 /* 1146 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1147 * ctx & set related memory objects being freed without us knowing. 1148 * This can happen if an agent thread is executing a kcpc_unbind(), 1149 * with this thread as the target, whilst we're concurrently doing a 1150 * restorectx() during, for example, a proc_exit(). Effectively, by 1151 * doing this, we're asking kcpc_free() to cv_wait() until 1152 * kcpc_restore() has completed. 1153 */ 1154 ctx->kc_flags |= KCPC_CTX_RESTORE; 1155 mutex_exit(&ctx->kc_lock); 1156 1157 /* 1158 * While programming the hardware, the counters should be stopped. We 1159 * don't do an explicit pcbe_allstop() here because they should have 1160 * been stopped already by the last consumer. 1161 */ 1162 ctx->kc_rawtick = KCPC_GET_TICK(); 1163 pcbe_ops->pcbe_program(ctx); 1164 1165 /* 1166 * Wake the agent thread if it's waiting in kcpc_free(). 1167 */ 1168 mutex_enter(&ctx->kc_lock); 1169 ctx->kc_flags &= ~KCPC_CTX_RESTORE; 1170 cv_signal(&ctx->kc_condv); 1171 mutex_exit(&ctx->kc_lock); 1172 } 1173 1174 /* 1175 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1176 * following context operators to the idle thread on each CPU. They stop the 1177 * counters when the idle thread is switched on, and they start them again when 1178 * it is switched off. 1179 */ 1180 1181 /*ARGSUSED*/ 1182 void 1183 kcpc_idle_save(struct cpu *cp) 1184 { 1185 /* 1186 * The idle thread shouldn't be run anywhere else. 1187 */ 1188 ASSERT(CPU == cp); 1189 1190 /* 1191 * We must hold the CPU's context lock to ensure the context isn't freed 1192 * while we're looking at it. 1193 */ 1194 mutex_enter(&cp->cpu_cpc_ctxlock); 1195 1196 if ((cp->cpu_cpc_ctx == NULL) || 1197 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1198 mutex_exit(&cp->cpu_cpc_ctxlock); 1199 return; 1200 } 1201 1202 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1203 mutex_exit(&cp->cpu_cpc_ctxlock); 1204 } 1205 1206 void 1207 kcpc_idle_restore(struct cpu *cp) 1208 { 1209 /* 1210 * The idle thread shouldn't be run anywhere else. 1211 */ 1212 ASSERT(CPU == cp); 1213 1214 /* 1215 * We must hold the CPU's context lock to ensure the context isn't freed 1216 * while we're looking at it. 1217 */ 1218 mutex_enter(&cp->cpu_cpc_ctxlock); 1219 1220 if ((cp->cpu_cpc_ctx == NULL) || 1221 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1222 mutex_exit(&cp->cpu_cpc_ctxlock); 1223 return; 1224 } 1225 1226 pcbe_ops->pcbe_allstop(); 1227 mutex_exit(&cp->cpu_cpc_ctxlock); 1228 } 1229 1230 /*ARGSUSED*/ 1231 static void 1232 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1233 { 1234 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1235 int i; 1236 1237 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1238 return; 1239 1240 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1241 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1242 rw_exit(&kcpc_cpuctx_lock); 1243 return; 1244 } 1245 cctx = kcpc_ctx_alloc(); 1246 kcpc_ctx_clone(ctx, cctx); 1247 rw_exit(&kcpc_cpuctx_lock); 1248 1249 /* 1250 * Copy the parent context's kc_flags field, but don't overwrite 1251 * the child's in case it was modified during kcpc_ctx_clone. 1252 */ 1253 cctx->kc_flags |= ctx->kc_flags; 1254 cctx->kc_thread = ct; 1255 cctx->kc_cpuid = -1; 1256 ct->t_cpc_set = cctx->kc_set; 1257 ct->t_cpc_ctx = cctx; 1258 1259 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1260 kcpc_set_t *ks = cctx->kc_set; 1261 /* 1262 * Our contract with the user requires us to immediately send an 1263 * overflow signal to all children if we have the LWPINHERIT 1264 * and SIGOVF flags set. In addition, all counters should be 1265 * set to UINT64_MAX, and their pic's overflow flag turned on 1266 * so that our trap() processing knows to send a signal. 1267 */ 1268 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1269 for (i = 0; i < ks->ks_nreqs; i++) { 1270 kcpc_request_t *kr = &ks->ks_req[i]; 1271 1272 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1273 *(kr->kr_data) = UINT64_MAX; 1274 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1275 } 1276 } 1277 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1278 aston(ct); 1279 } 1280 1281 installctx(ct, cctx, kcpc_save, kcpc_restore, 1282 NULL, kcpc_lwp_create, NULL, kcpc_free); 1283 } 1284 1285 /* 1286 * Counter Stoppage Theory 1287 * 1288 * The counters may need to be stopped properly at the following occasions: 1289 * 1290 * 1) An LWP exits. 1291 * 2) A thread exits. 1292 * 3) An LWP performs an exec(). 1293 * 4) A bound set is unbound. 1294 * 1295 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1296 * to be freed as well. 1297 * 1298 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1299 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1300 * context. 1301 * 1302 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1303 * 1304 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1305 * been called from exec. It stops the counters _and_ frees the context. 1306 * 1307 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1308 * 1309 * CPU-bound counters are always stopped via kcpc_unbind(). 1310 */ 1311 1312 /* 1313 * We're being called to delete the context; we ensure that all associated data 1314 * structures are freed, and that the hardware is passivated if this is an exec. 1315 */ 1316 1317 /*ARGSUSED*/ 1318 static void 1319 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1320 { 1321 int i; 1322 kcpc_set_t *set = ctx->kc_set; 1323 1324 ASSERT(set != NULL); 1325 1326 /* 1327 * Wait for kcpc_restore() to finish before we tear things down. 1328 */ 1329 mutex_enter(&ctx->kc_lock); 1330 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1331 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1332 ctx->kc_flags |= KCPC_CTX_INVALID; 1333 mutex_exit(&ctx->kc_lock); 1334 1335 if (isexec) { 1336 /* 1337 * This thread is execing, and after the exec it should not have 1338 * any performance counter context. Stop the counters properly 1339 * here so the system isn't surprised by an overflow interrupt 1340 * later. 1341 */ 1342 if (ctx->kc_cpuid != -1) { 1343 cpu_t *cp; 1344 /* 1345 * CPU-bound context; stop the appropriate CPU's ctrs. 1346 * Hold cpu_lock while examining the CPU to ensure it 1347 * doesn't go away. 1348 */ 1349 mutex_enter(&cpu_lock); 1350 cp = cpu_get(ctx->kc_cpuid); 1351 /* 1352 * The CPU could have been DR'd out, so only stop the 1353 * CPU and clear its context pointer if the CPU still 1354 * exists. 1355 */ 1356 if (cp != NULL) { 1357 mutex_enter(&cp->cpu_cpc_ctxlock); 1358 kcpc_stop_hw(ctx); 1359 cp->cpu_cpc_ctx = NULL; 1360 mutex_exit(&cp->cpu_cpc_ctxlock); 1361 } 1362 mutex_exit(&cpu_lock); 1363 ASSERT(curthread->t_cpc_ctx == NULL); 1364 } else { 1365 /* 1366 * Thread-bound context; stop _this_ CPU's counters. 1367 */ 1368 kpreempt_disable(); 1369 pcbe_ops->pcbe_allstop(); 1370 atomic_or_uint(&ctx->kc_flags, 1371 KCPC_CTX_INVALID_STOPPED); 1372 kpreempt_enable(); 1373 curthread->t_cpc_ctx = NULL; 1374 } 1375 1376 /* 1377 * Since we are being called from an exec and we know that 1378 * exec is not permitted via the agent thread, we should clean 1379 * up this thread's CPC state completely, and not leave dangling 1380 * CPC pointers behind. 1381 */ 1382 ASSERT(ctx->kc_thread == curthread); 1383 curthread->t_cpc_set = NULL; 1384 } 1385 1386 /* 1387 * Walk through each request in this context's set and free the PCBE's 1388 * configuration if it exists. 1389 */ 1390 for (i = 0; i < set->ks_nreqs; i++) { 1391 if (set->ks_req[i].kr_config != NULL) 1392 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1393 } 1394 1395 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1396 kcpc_ctx_free(ctx); 1397 kcpc_free_set(set); 1398 } 1399 1400 /* 1401 * Free the memory associated with a request set. 1402 */ 1403 void 1404 kcpc_free_set(kcpc_set_t *set) 1405 { 1406 int i; 1407 kcpc_request_t *req; 1408 1409 ASSERT(set->ks_req != NULL); 1410 1411 for (i = 0; i < set->ks_nreqs; i++) { 1412 req = &set->ks_req[i]; 1413 1414 if (req->kr_nattrs != 0) { 1415 kmem_free(req->kr_attr, 1416 req->kr_nattrs * sizeof (kcpc_attr_t)); 1417 } 1418 } 1419 1420 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1421 cv_destroy(&set->ks_condv); 1422 mutex_destroy(&set->ks_lock); 1423 kmem_free(set, sizeof (kcpc_set_t)); 1424 } 1425 1426 /* 1427 * Grab every existing context and mark it as invalid. 1428 */ 1429 void 1430 kcpc_invalidate_all(void) 1431 { 1432 kcpc_ctx_t *ctx; 1433 long hash; 1434 1435 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1436 mutex_enter(&kcpc_ctx_llock[hash]); 1437 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1438 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1439 mutex_exit(&kcpc_ctx_llock[hash]); 1440 } 1441 } 1442 1443 /* 1444 * Interface for PCBEs to signal that an existing configuration has suddenly 1445 * become invalid. 1446 */ 1447 void 1448 kcpc_invalidate_config(void *token) 1449 { 1450 kcpc_ctx_t *ctx = token; 1451 1452 ASSERT(ctx != NULL); 1453 1454 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1455 } 1456 1457 /* 1458 * Called from lwp_exit() and thread_exit() 1459 */ 1460 void 1461 kcpc_passivate(void) 1462 { 1463 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1464 kcpc_set_t *set = curthread->t_cpc_set; 1465 1466 if (set == NULL) 1467 return; 1468 1469 /* 1470 * We're cleaning up after this thread; ensure there are no dangling 1471 * CPC pointers left behind. The context and set will be freed by 1472 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1473 * the case of a CPU-bound set. 1474 */ 1475 curthread->t_cpc_ctx = NULL; 1476 1477 if (ctx == NULL) { 1478 /* 1479 * This thread has a set but no context; it must be a CPU-bound 1480 * set. The hardware will be stopped via kcpc_unbind() when the 1481 * process exits and closes its file descriptors with 1482 * kcpc_close(). Our only job here is to clean up this thread's 1483 * state; the set will be freed with the unbind(). 1484 */ 1485 (void) kcpc_unbind(set); 1486 /* 1487 * Unbinding a set belonging to the current thread should clear 1488 * its set pointer. 1489 */ 1490 ASSERT(curthread->t_cpc_set == NULL); 1491 return; 1492 } 1493 1494 curthread->t_cpc_set = NULL; 1495 1496 /* 1497 * This thread/LWP is exiting but context switches will continue to 1498 * happen for a bit as the exit proceeds. Kernel preemption must be 1499 * disabled here to prevent a race between checking or setting the 1500 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1501 * a context switch. 1502 */ 1503 1504 kpreempt_disable(); 1505 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1506 pcbe_ops->pcbe_allstop(); 1507 atomic_or_uint(&ctx->kc_flags, 1508 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1509 } 1510 kpreempt_enable(); 1511 } 1512 1513 /* 1514 * Assign the requests in the given set to the PICs in the context. 1515 * Returns 0 if successful, -1 on failure. 1516 */ 1517 /*ARGSUSED*/ 1518 int 1519 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1520 { 1521 int i; 1522 int *picnum_save; 1523 1524 ASSERT(set->ks_nreqs <= cpc_ncounters); 1525 1526 /* 1527 * Provide kcpc_tryassign() with scratch space to avoid doing an 1528 * alloc/free with every invocation. 1529 */ 1530 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1531 /* 1532 * kcpc_tryassign() blindly walks through each request in the set, 1533 * seeing if a counter can count its event. If yes, it assigns that 1534 * counter. However, that counter may have been the only capable counter 1535 * for _another_ request's event. The solution is to try every possible 1536 * request first. Note that this does not cover all solutions, as 1537 * that would require all unique orderings of requests, an n^n operation 1538 * which would be unacceptable for architectures with many counters. 1539 */ 1540 for (i = 0; i < set->ks_nreqs; i++) 1541 if (kcpc_tryassign(set, i, picnum_save) == 0) 1542 break; 1543 1544 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1545 if (i == set->ks_nreqs) 1546 return (-1); 1547 return (0); 1548 } 1549 1550 static int 1551 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1552 { 1553 int i; 1554 int j; 1555 uint64_t bitmap = 0, resmap = 0; 1556 uint64_t ctrmap; 1557 1558 /* 1559 * We are attempting to assign the reqs to pics, but we may fail. If we 1560 * fail, we need to restore the state of the requests to what it was 1561 * when we found it, as some reqs may have been explicitly assigned to 1562 * a specific PIC beforehand. We do this by snapshotting the assignments 1563 * now and restoring from it later if we fail. 1564 * 1565 * Also we note here which counters have already been claimed by 1566 * requests with explicit counter assignments. 1567 */ 1568 for (i = 0; i < set->ks_nreqs; i++) { 1569 scratch[i] = set->ks_req[i].kr_picnum; 1570 if (set->ks_req[i].kr_picnum != -1) 1571 resmap |= (1 << set->ks_req[i].kr_picnum); 1572 } 1573 1574 /* 1575 * Walk through requests assigning them to the first PIC that is 1576 * capable. 1577 */ 1578 i = starting_req; 1579 do { 1580 if (set->ks_req[i].kr_picnum != -1) { 1581 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1582 bitmap |= (1 << set->ks_req[i].kr_picnum); 1583 if (++i == set->ks_nreqs) 1584 i = 0; 1585 continue; 1586 } 1587 1588 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1589 for (j = 0; j < cpc_ncounters; j++) { 1590 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1591 (resmap & (1 << j)) == 0) { 1592 /* 1593 * We can assign this counter because: 1594 * 1595 * 1. It can count the event (ctrmap) 1596 * 2. It hasn't been assigned yet (bitmap) 1597 * 3. It wasn't reserved by a request (resmap) 1598 */ 1599 bitmap |= (1 << j); 1600 break; 1601 } 1602 } 1603 if (j == cpc_ncounters) { 1604 for (i = 0; i < set->ks_nreqs; i++) 1605 set->ks_req[i].kr_picnum = scratch[i]; 1606 return (-1); 1607 } 1608 set->ks_req[i].kr_picnum = j; 1609 1610 if (++i == set->ks_nreqs) 1611 i = 0; 1612 } while (i != starting_req); 1613 1614 return (0); 1615 } 1616 1617 kcpc_set_t * 1618 kcpc_dup_set(kcpc_set_t *set) 1619 { 1620 kcpc_set_t *new; 1621 int i; 1622 int j; 1623 1624 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1625 new->ks_state &= ~KCPC_SET_BOUND; 1626 new->ks_flags = set->ks_flags; 1627 new->ks_nreqs = set->ks_nreqs; 1628 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1629 KM_SLEEP); 1630 new->ks_data = NULL; 1631 new->ks_ctx = NULL; 1632 1633 for (i = 0; i < new->ks_nreqs; i++) { 1634 new->ks_req[i].kr_config = NULL; 1635 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1636 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1637 new->ks_req[i].kr_picp = NULL; 1638 new->ks_req[i].kr_data = NULL; 1639 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1640 CPC_MAX_EVENT_LEN); 1641 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1642 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1643 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1644 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1645 sizeof (kcpc_attr_t), KM_SLEEP); 1646 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1647 new->ks_req[i].kr_attr[j].ka_val = 1648 set->ks_req[i].kr_attr[j].ka_val; 1649 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1650 set->ks_req[i].kr_attr[j].ka_name, 1651 CPC_MAX_ATTR_LEN); 1652 } 1653 } 1654 1655 return (new); 1656 } 1657 1658 int 1659 kcpc_allow_nonpriv(void *token) 1660 { 1661 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1662 } 1663 1664 void 1665 kcpc_invalidate(kthread_t *t) 1666 { 1667 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1668 1669 if (ctx != NULL) 1670 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1671 } 1672 1673 /* 1674 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1675 * are used to construct PCBE names, starting with the most specific, 1676 * "pcbe.first.second.third.fourth" and ending with the least specific, 1677 * "pcbe.first". 1678 * 1679 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1680 */ 1681 int 1682 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1683 { 1684 uint_t s[3]; 1685 1686 s[0] = first; 1687 s[1] = second; 1688 s[2] = third; 1689 1690 return (modload_qualified("pcbe", 1691 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1692 } 1693 1694 char * 1695 kcpc_list_attrs(void) 1696 { 1697 ASSERT(pcbe_ops != NULL); 1698 1699 return (pcbe_ops->pcbe_list_attrs()); 1700 } 1701 1702 char * 1703 kcpc_list_events(uint_t pic) 1704 { 1705 ASSERT(pcbe_ops != NULL); 1706 1707 return (pcbe_ops->pcbe_list_events(pic)); 1708 } 1709 1710 uint_t 1711 kcpc_pcbe_capabilities(void) 1712 { 1713 ASSERT(pcbe_ops != NULL); 1714 1715 return (pcbe_ops->pcbe_caps); 1716 } 1717 1718 int 1719 kcpc_pcbe_loaded(void) 1720 { 1721 return (pcbe_ops == NULL ? -1 : 0); 1722 } 1723