1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/inttypes.h> 33 #include <sys/cmn_err.h> 34 #include <sys/time.h> 35 #include <sys/ksynch.h> 36 #include <sys/systm.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_impl.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/atomic.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #if defined(__x86) 45 #include <asm/clock.h> 46 #endif 47 48 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 49 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 50 51 52 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 53 int kcpc_cpuctx; /* number of cpu-specific contexts */ 54 55 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 56 57 /* 58 * These are set when a PCBE module is loaded. 59 */ 60 uint_t cpc_ncounters = 0; 61 pcbe_ops_t *pcbe_ops = NULL; 62 63 /* 64 * Statistics on (mis)behavior 65 */ 66 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 67 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 68 69 /* 70 * Is misbehaviour (overflow in a thread with no context) fatal? 71 */ 72 #ifdef DEBUG 73 static int kcpc_nullctx_panic = 1; 74 #else 75 static int kcpc_nullctx_panic = 0; 76 #endif 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 82 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 83 static void kcpc_free_configs(kcpc_set_t *set); 84 static kcpc_ctx_t *kcpc_ctx_alloc(void); 85 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 86 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 87 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 88 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 89 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 90 91 void 92 kcpc_register_pcbe(pcbe_ops_t *ops) 93 { 94 pcbe_ops = ops; 95 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 96 } 97 98 int 99 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 100 { 101 cpu_t *cp; 102 kcpc_ctx_t *ctx; 103 int error; 104 105 ctx = kcpc_ctx_alloc(); 106 107 if (kcpc_assign_reqs(set, ctx) != 0) { 108 kcpc_ctx_free(ctx); 109 *subcode = CPC_RESOURCE_UNAVAIL; 110 return (EINVAL); 111 } 112 113 ctx->kc_cpuid = cpuid; 114 ctx->kc_thread = curthread; 115 116 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 117 118 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 119 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 120 kcpc_ctx_free(ctx); 121 return (error); 122 } 123 124 set->ks_ctx = ctx; 125 ctx->kc_set = set; 126 127 /* 128 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 129 * we are manipulating the cpu_t and programming the hardware, else the 130 * the cpu_t could go away while we're looking at it. 131 */ 132 mutex_enter(&cpu_lock); 133 cp = cpu_get(cpuid); 134 135 if (cp == NULL) 136 /* 137 * The CPU could have been DRd out while we were getting set up. 138 */ 139 goto unbound; 140 141 mutex_enter(&cp->cpu_cpc_ctxlock); 142 143 if (cp->cpu_cpc_ctx != NULL) { 144 /* 145 * If this CPU already has a bound set, return an error. 146 */ 147 mutex_exit(&cp->cpu_cpc_ctxlock); 148 goto unbound; 149 } 150 151 if (curthread->t_bind_cpu != cpuid) { 152 mutex_exit(&cp->cpu_cpc_ctxlock); 153 goto unbound; 154 } 155 cp->cpu_cpc_ctx = ctx; 156 157 /* 158 * Kernel preemption must be disabled while fiddling with the hardware 159 * registers to prevent partial updates. 160 */ 161 kpreempt_disable(); 162 ctx->kc_rawtick = KCPC_GET_TICK(); 163 pcbe_ops->pcbe_program(ctx); 164 kpreempt_enable(); 165 166 mutex_exit(&cp->cpu_cpc_ctxlock); 167 mutex_exit(&cpu_lock); 168 169 mutex_enter(&set->ks_lock); 170 set->ks_state |= KCPC_SET_BOUND; 171 cv_signal(&set->ks_condv); 172 mutex_exit(&set->ks_lock); 173 174 return (0); 175 176 unbound: 177 mutex_exit(&cpu_lock); 178 set->ks_ctx = NULL; 179 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 180 kcpc_ctx_free(ctx); 181 return (EAGAIN); 182 } 183 184 int 185 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 186 { 187 kcpc_ctx_t *ctx; 188 int error; 189 190 /* 191 * Only one set is allowed per context, so ensure there is no 192 * existing context. 193 */ 194 195 if (t->t_cpc_ctx != NULL) 196 return (EEXIST); 197 198 ctx = kcpc_ctx_alloc(); 199 200 /* 201 * The context must begin life frozen until it has been properly 202 * programmed onto the hardware. This prevents the context ops from 203 * worrying about it until we're ready. 204 */ 205 ctx->kc_flags |= KCPC_CTX_FREEZE; 206 ctx->kc_hrtime = gethrtime(); 207 208 if (kcpc_assign_reqs(set, ctx) != 0) { 209 kcpc_ctx_free(ctx); 210 *subcode = CPC_RESOURCE_UNAVAIL; 211 return (EINVAL); 212 } 213 214 ctx->kc_cpuid = -1; 215 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 216 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 217 ctx->kc_thread = t; 218 t->t_cpc_ctx = ctx; 219 /* 220 * Permit threads to look at their own hardware counters from userland. 221 */ 222 ctx->kc_flags |= KCPC_CTX_NONPRIV; 223 224 /* 225 * Create the data store for this set. 226 */ 227 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 228 229 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 230 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 231 kcpc_ctx_free(ctx); 232 t->t_cpc_ctx = NULL; 233 return (error); 234 } 235 236 set->ks_ctx = ctx; 237 ctx->kc_set = set; 238 239 /* 240 * Add a device context to the subject thread. 241 */ 242 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 243 kcpc_lwp_create, NULL, kcpc_free); 244 245 /* 246 * Ask the backend to program the hardware. 247 */ 248 if (t == curthread) { 249 kpreempt_disable(); 250 ctx->kc_rawtick = KCPC_GET_TICK(); 251 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 252 pcbe_ops->pcbe_program(ctx); 253 kpreempt_enable(); 254 } else 255 /* 256 * Since we are the agent LWP, we know the victim LWP is stopped 257 * until we're done here; no need to worry about preemption or 258 * migration here. We still use an atomic op to clear the flag 259 * to ensure the flags are always self-consistent; they can 260 * still be accessed from, for instance, another CPU doing a 261 * kcpc_invalidate_all(). 262 */ 263 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 264 265 mutex_enter(&set->ks_lock); 266 set->ks_state |= KCPC_SET_BOUND; 267 cv_signal(&set->ks_condv); 268 mutex_exit(&set->ks_lock); 269 270 return (0); 271 } 272 273 /* 274 * Walk through each request in the set and ask the PCBE to configure a 275 * corresponding counter. 276 */ 277 static int 278 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 279 { 280 int i; 281 int ret; 282 kcpc_request_t *rp; 283 284 for (i = 0; i < set->ks_nreqs; i++) { 285 int n; 286 rp = &set->ks_req[i]; 287 288 n = rp->kr_picnum; 289 290 ASSERT(n >= 0 && n < cpc_ncounters); 291 292 ASSERT(ctx->kc_pics[n].kp_req == NULL); 293 294 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 295 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 296 == 0) { 297 *subcode = -1; 298 return (ENOTSUP); 299 } 300 /* 301 * If any of the counters have requested overflow 302 * notification, we flag the context as being one that 303 * cares about overflow. 304 */ 305 ctx->kc_flags |= KCPC_CTX_SIGOVF; 306 } 307 308 rp->kr_config = NULL; 309 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 310 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 311 &(rp->kr_config), (void *)ctx)) != 0) { 312 kcpc_free_configs(set); 313 *subcode = ret; 314 switch (ret) { 315 case CPC_ATTR_REQUIRES_PRIVILEGE: 316 case CPC_HV_NO_ACCESS: 317 return (EACCES); 318 default: 319 return (EINVAL); 320 } 321 } 322 323 ctx->kc_pics[n].kp_req = rp; 324 rp->kr_picp = &ctx->kc_pics[n]; 325 rp->kr_data = set->ks_data + rp->kr_index; 326 *rp->kr_data = rp->kr_preset; 327 } 328 329 return (0); 330 } 331 332 static void 333 kcpc_free_configs(kcpc_set_t *set) 334 { 335 int i; 336 337 for (i = 0; i < set->ks_nreqs; i++) 338 if (set->ks_req[i].kr_config != NULL) 339 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 340 } 341 342 /* 343 * buf points to a user address and the data should be copied out to that 344 * address in the current process. 345 */ 346 int 347 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 348 { 349 kcpc_ctx_t *ctx = set->ks_ctx; 350 uint64_t curtick = KCPC_GET_TICK(); 351 352 mutex_enter(&set->ks_lock); 353 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 354 mutex_exit(&set->ks_lock); 355 return (EINVAL); 356 } 357 mutex_exit(&set->ks_lock); 358 359 if (ctx->kc_flags & KCPC_CTX_INVALID) 360 return (EAGAIN); 361 362 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 363 /* 364 * Kernel preemption must be disabled while reading the 365 * hardware regs, and if this is a CPU-bound context, while 366 * checking the CPU binding of the current thread. 367 */ 368 kpreempt_disable(); 369 370 if (ctx->kc_cpuid != -1) { 371 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 372 kpreempt_enable(); 373 return (EAGAIN); 374 } 375 } 376 377 if (ctx->kc_thread == curthread) { 378 ctx->kc_hrtime = gethrtime(); 379 pcbe_ops->pcbe_sample(ctx); 380 ctx->kc_vtick += curtick - ctx->kc_rawtick; 381 ctx->kc_rawtick = curtick; 382 } 383 384 kpreempt_enable(); 385 386 /* 387 * The config may have been invalidated by 388 * the pcbe_sample op. 389 */ 390 if (ctx->kc_flags & KCPC_CTX_INVALID) 391 return (EAGAIN); 392 } 393 394 if (copyout(set->ks_data, buf, 395 set->ks_nreqs * sizeof (uint64_t)) == -1) 396 return (EFAULT); 397 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 398 return (EFAULT); 399 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 400 return (EFAULT); 401 402 return (0); 403 } 404 405 /* 406 * Stop the counters on the CPU this context is bound to. 407 */ 408 static void 409 kcpc_stop_hw(kcpc_ctx_t *ctx) 410 { 411 cpu_t *cp; 412 413 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 414 == KCPC_CTX_INVALID); 415 416 kpreempt_disable(); 417 418 cp = cpu_get(ctx->kc_cpuid); 419 ASSERT(cp != NULL); 420 421 if (cp == CPU) { 422 pcbe_ops->pcbe_allstop(); 423 atomic_or_uint(&ctx->kc_flags, 424 KCPC_CTX_INVALID_STOPPED); 425 } else 426 kcpc_remote_stop(cp); 427 kpreempt_enable(); 428 } 429 430 int 431 kcpc_unbind(kcpc_set_t *set) 432 { 433 kcpc_ctx_t *ctx; 434 kthread_t *t; 435 436 /* 437 * We could be racing with the process's agent thread as it 438 * binds the set; we must wait for the set to finish binding 439 * before attempting to tear it down. 440 */ 441 mutex_enter(&set->ks_lock); 442 while ((set->ks_state & KCPC_SET_BOUND) == 0) 443 cv_wait(&set->ks_condv, &set->ks_lock); 444 mutex_exit(&set->ks_lock); 445 446 ctx = set->ks_ctx; 447 448 /* 449 * Use kc_lock to synchronize with kcpc_restore(). 450 */ 451 mutex_enter(&ctx->kc_lock); 452 ctx->kc_flags |= KCPC_CTX_INVALID; 453 mutex_exit(&ctx->kc_lock); 454 455 if (ctx->kc_cpuid == -1) { 456 t = ctx->kc_thread; 457 /* 458 * The context is thread-bound and therefore has a device 459 * context. It will be freed via removectx() calling 460 * freectx() calling kcpc_free(). 461 */ 462 if (t == curthread && 463 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 464 kpreempt_disable(); 465 pcbe_ops->pcbe_allstop(); 466 atomic_or_uint(&ctx->kc_flags, 467 KCPC_CTX_INVALID_STOPPED); 468 kpreempt_enable(); 469 } 470 #ifdef DEBUG 471 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 472 kcpc_lwp_create, NULL, kcpc_free) == 0) 473 panic("kcpc_unbind: context %p not preset on thread %p", 474 ctx, t); 475 #else 476 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 477 kcpc_lwp_create, NULL, kcpc_free); 478 #endif /* DEBUG */ 479 t->t_cpc_set = NULL; 480 t->t_cpc_ctx = NULL; 481 } else { 482 /* 483 * If we are unbinding a CPU-bound set from a remote CPU, the 484 * native CPU's idle thread could be in the midst of programming 485 * this context onto the CPU. We grab the context's lock here to 486 * ensure that the idle thread is done with it. When we release 487 * the lock, the CPU no longer has a context and the idle thread 488 * will move on. 489 * 490 * cpu_lock must be held to prevent the CPU from being DR'd out 491 * while we disassociate the context from the cpu_t. 492 */ 493 cpu_t *cp; 494 mutex_enter(&cpu_lock); 495 cp = cpu_get(ctx->kc_cpuid); 496 if (cp != NULL) { 497 /* 498 * The CPU may have been DR'd out of the system. 499 */ 500 mutex_enter(&cp->cpu_cpc_ctxlock); 501 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 502 kcpc_stop_hw(ctx); 503 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 504 cp->cpu_cpc_ctx = NULL; 505 mutex_exit(&cp->cpu_cpc_ctxlock); 506 } 507 mutex_exit(&cpu_lock); 508 if (ctx->kc_thread == curthread) { 509 kcpc_free(ctx, 0); 510 curthread->t_cpc_set = NULL; 511 } 512 } 513 514 return (0); 515 } 516 517 int 518 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 519 { 520 int i; 521 522 ASSERT(set != NULL); 523 ASSERT(set->ks_state & KCPC_SET_BOUND); 524 ASSERT(set->ks_ctx->kc_thread == curthread); 525 ASSERT(set->ks_ctx->kc_cpuid == -1); 526 527 if (index < 0 || index >= set->ks_nreqs) 528 return (EINVAL); 529 530 for (i = 0; i < set->ks_nreqs; i++) 531 if (set->ks_req[i].kr_index == index) 532 break; 533 ASSERT(i != set->ks_nreqs); 534 535 set->ks_req[i].kr_preset = preset; 536 return (0); 537 } 538 539 int 540 kcpc_restart(kcpc_set_t *set) 541 { 542 kcpc_ctx_t *ctx = set->ks_ctx; 543 int i; 544 545 ASSERT(set->ks_state & KCPC_SET_BOUND); 546 ASSERT(ctx->kc_thread == curthread); 547 ASSERT(ctx->kc_cpuid == -1); 548 549 kpreempt_disable(); 550 551 /* 552 * If the user is doing this on a running set, make sure the counters 553 * are stopped first. 554 */ 555 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 556 pcbe_ops->pcbe_allstop(); 557 558 for (i = 0; i < set->ks_nreqs; i++) { 559 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 560 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 561 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 562 } 563 564 /* 565 * Ask the backend to program the hardware. 566 */ 567 ctx->kc_rawtick = KCPC_GET_TICK(); 568 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 569 pcbe_ops->pcbe_program(ctx); 570 kpreempt_enable(); 571 572 return (0); 573 } 574 575 /* 576 * Caller must hold kcpc_cpuctx_lock. 577 */ 578 int 579 kcpc_enable(kthread_t *t, int cmd, int enable) 580 { 581 kcpc_ctx_t *ctx = t->t_cpc_ctx; 582 kcpc_set_t *set = t->t_cpc_set; 583 kcpc_set_t *newset; 584 int i; 585 int flag; 586 int err; 587 588 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 589 590 if (ctx == NULL) { 591 /* 592 * This thread has a set but no context; it must be a 593 * CPU-bound set. 594 */ 595 ASSERT(t->t_cpc_set != NULL); 596 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 597 return (EINVAL); 598 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 599 return (EAGAIN); 600 601 if (cmd == CPC_ENABLE) { 602 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 603 return (EINVAL); 604 kpreempt_disable(); 605 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 606 kcpc_restore(ctx); 607 kpreempt_enable(); 608 } else if (cmd == CPC_DISABLE) { 609 if (ctx->kc_flags & KCPC_CTX_FREEZE) 610 return (EINVAL); 611 kpreempt_disable(); 612 kcpc_save(ctx); 613 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 614 kpreempt_enable(); 615 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 616 /* 617 * Strategy for usr/sys: stop counters and update set's presets 618 * with current counter values, unbind, update requests with 619 * new config, then re-bind. 620 */ 621 flag = (cmd == CPC_USR_EVENTS) ? 622 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 623 624 kpreempt_disable(); 625 atomic_or_uint(&ctx->kc_flags, 626 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 627 pcbe_ops->pcbe_allstop(); 628 kpreempt_enable(); 629 for (i = 0; i < set->ks_nreqs; i++) { 630 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 631 if (enable) 632 set->ks_req[i].kr_flags |= flag; 633 else 634 set->ks_req[i].kr_flags &= ~flag; 635 } 636 newset = kcpc_dup_set(set); 637 if (kcpc_unbind(set) != 0) 638 return (EINVAL); 639 t->t_cpc_set = newset; 640 if (kcpc_bind_thread(newset, t, &err) != 0) { 641 t->t_cpc_set = NULL; 642 kcpc_free_set(newset); 643 return (EINVAL); 644 } 645 } else 646 return (EINVAL); 647 648 return (0); 649 } 650 651 /* 652 * Provide PCBEs with a way of obtaining the configs of every counter which will 653 * be programmed together. 654 * 655 * If current is NULL, provide the first config. 656 * 657 * If data != NULL, caller wants to know where the data store associated with 658 * the config we return is located. 659 */ 660 void * 661 kcpc_next_config(void *token, void *current, uint64_t **data) 662 { 663 int i; 664 kcpc_pic_t *pic; 665 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 666 667 if (current == NULL) { 668 /* 669 * Client would like the first config, which may not be in 670 * counter 0; we need to search through the counters for the 671 * first config. 672 */ 673 for (i = 0; i < cpc_ncounters; i++) 674 if (ctx->kc_pics[i].kp_req != NULL) 675 break; 676 /* 677 * There are no counters configured for the given context. 678 */ 679 if (i == cpc_ncounters) 680 return (NULL); 681 } else { 682 /* 683 * There surely is a faster way to do this. 684 */ 685 for (i = 0; i < cpc_ncounters; i++) { 686 pic = &ctx->kc_pics[i]; 687 688 if (pic->kp_req != NULL && 689 current == pic->kp_req->kr_config) 690 break; 691 } 692 693 /* 694 * We found the current config at picnum i. Now search for the 695 * next configured PIC. 696 */ 697 for (i++; i < cpc_ncounters; i++) { 698 pic = &ctx->kc_pics[i]; 699 if (pic->kp_req != NULL) 700 break; 701 } 702 703 if (i == cpc_ncounters) 704 return (NULL); 705 } 706 707 if (data != NULL) { 708 *data = ctx->kc_pics[i].kp_req->kr_data; 709 } 710 711 return (ctx->kc_pics[i].kp_req->kr_config); 712 } 713 714 715 static kcpc_ctx_t * 716 kcpc_ctx_alloc(void) 717 { 718 kcpc_ctx_t *ctx; 719 long hash; 720 721 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP); 722 723 hash = CPC_HASH_CTX(ctx); 724 mutex_enter(&kcpc_ctx_llock[hash]); 725 ctx->kc_next = kcpc_ctx_list[hash]; 726 kcpc_ctx_list[hash] = ctx; 727 mutex_exit(&kcpc_ctx_llock[hash]); 728 729 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 730 cpc_ncounters, KM_SLEEP); 731 732 ctx->kc_cpuid = -1; 733 734 return (ctx); 735 } 736 737 /* 738 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 739 * in the flags. 740 */ 741 static void 742 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 743 { 744 kcpc_set_t *ks = ctx->kc_set, *cks; 745 int i, j; 746 int code; 747 748 ASSERT(ks != NULL); 749 750 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 751 return; 752 753 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 754 cks->ks_state &= ~KCPC_SET_BOUND; 755 cctx->kc_set = cks; 756 cks->ks_flags = ks->ks_flags; 757 cks->ks_nreqs = ks->ks_nreqs; 758 cks->ks_req = kmem_alloc(cks->ks_nreqs * 759 sizeof (kcpc_request_t), KM_SLEEP); 760 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 761 KM_SLEEP); 762 cks->ks_ctx = cctx; 763 764 for (i = 0; i < cks->ks_nreqs; i++) { 765 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 766 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 767 (void) strncpy(cks->ks_req[i].kr_event, 768 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 769 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 770 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 771 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 772 if (ks->ks_req[i].kr_nattrs > 0) { 773 cks->ks_req[i].kr_attr = 774 kmem_alloc(ks->ks_req[i].kr_nattrs * 775 sizeof (kcpc_attr_t), KM_SLEEP); 776 } 777 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 778 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 779 ks->ks_req[i].kr_attr[j].ka_name, 780 CPC_MAX_ATTR_LEN); 781 cks->ks_req[i].kr_attr[j].ka_val = 782 ks->ks_req[i].kr_attr[j].ka_val; 783 } 784 } 785 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 786 kcpc_invalidate_config(cctx); 787 788 mutex_enter(&cks->ks_lock); 789 cks->ks_state |= KCPC_SET_BOUND; 790 cv_signal(&cks->ks_condv); 791 mutex_exit(&cks->ks_lock); 792 } 793 794 795 static void 796 kcpc_ctx_free(kcpc_ctx_t *ctx) 797 { 798 kcpc_ctx_t **loc; 799 long hash = CPC_HASH_CTX(ctx); 800 801 mutex_enter(&kcpc_ctx_llock[hash]); 802 loc = &kcpc_ctx_list[hash]; 803 ASSERT(*loc != NULL); 804 while (*loc != ctx) 805 loc = &(*loc)->kc_next; 806 *loc = ctx->kc_next; 807 mutex_exit(&kcpc_ctx_llock[hash]); 808 809 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 810 cv_destroy(&ctx->kc_condv); 811 mutex_destroy(&ctx->kc_lock); 812 kmem_free(ctx, sizeof (*ctx)); 813 } 814 815 /* 816 * Generic interrupt handler used on hardware that generates 817 * overflow interrupts. 818 * 819 * Note: executed at high-level interrupt context! 820 */ 821 /*ARGSUSED*/ 822 kcpc_ctx_t * 823 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 824 { 825 kcpc_ctx_t *ctx; 826 kthread_t *t = curthread; 827 int i; 828 829 /* 830 * On both x86 and UltraSPARC, we may deliver the high-level 831 * interrupt in kernel mode, just after we've started to run an 832 * interrupt thread. (That's because the hardware helpfully 833 * delivers the overflow interrupt some random number of cycles 834 * after the instruction that caused the overflow by which time 835 * we're in some part of the kernel, not necessarily running on 836 * the right thread). 837 * 838 * Check for this case here -- find the pinned thread 839 * that was running when the interrupt went off. 840 */ 841 if (t->t_flag & T_INTR_THREAD) { 842 klwp_t *lwp; 843 844 atomic_add_32(&kcpc_intrctx_count, 1); 845 846 /* 847 * Note that t_lwp is always set to point at the underlying 848 * thread, thus this will work in the presence of nested 849 * interrupts. 850 */ 851 ctx = NULL; 852 if ((lwp = t->t_lwp) != NULL) { 853 t = lwptot(lwp); 854 ctx = t->t_cpc_ctx; 855 } 856 } else 857 ctx = t->t_cpc_ctx; 858 859 if (ctx == NULL) { 860 /* 861 * This can easily happen if we're using the counters in 862 * "shared" mode, for example, and an overflow interrupt 863 * occurs while we are running cpustat. In that case, the 864 * bound thread that has the context that belongs to this 865 * CPU is almost certainly sleeping (if it was running on 866 * the CPU we'd have found it above), and the actual 867 * interrupted thread has no knowledge of performance counters! 868 */ 869 ctx = curthread->t_cpu->cpu_cpc_ctx; 870 if (ctx != NULL) { 871 /* 872 * Return the bound context for this CPU to 873 * the interrupt handler so that it can synchronously 874 * sample the hardware counters and restart them. 875 */ 876 return (ctx); 877 } 878 879 /* 880 * As long as the overflow interrupt really is delivered early 881 * enough after trapping into the kernel to avoid switching 882 * threads, we must always be able to find the cpc context, 883 * or something went terribly wrong i.e. we ended up 884 * running a passivated interrupt thread, a kernel 885 * thread or we interrupted idle, all of which are Very Bad. 886 */ 887 if (kcpc_nullctx_panic) 888 panic("null cpc context, thread %p", (void *)t); 889 atomic_add_32(&kcpc_nullctx_count, 1); 890 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 891 /* 892 * Schedule an ast to sample the counters, which will 893 * propagate any overflow into the virtualized performance 894 * counter(s), and may deliver a signal. 895 */ 896 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 897 /* 898 * If a counter has overflowed which was counting on behalf of 899 * a request which specified CPC_OVF_NOTIFY_EMT, send the 900 * process a signal. 901 */ 902 for (i = 0; i < cpc_ncounters; i++) { 903 if (ctx->kc_pics[i].kp_req != NULL && 904 bitmap & (1 << i) && 905 ctx->kc_pics[i].kp_req->kr_flags & 906 CPC_OVF_NOTIFY_EMT) { 907 /* 908 * A signal has been requested for this PIC, so 909 * so freeze the context. The interrupt handler 910 * has already stopped the counter hardware. 911 */ 912 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 913 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 914 KCPC_PIC_OVERFLOWED); 915 } 916 } 917 aston(t); 918 } 919 return (NULL); 920 } 921 922 /* 923 * The current thread context had an overflow interrupt; we're 924 * executing here in high-level interrupt context. 925 */ 926 /*ARGSUSED*/ 927 uint_t 928 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 929 { 930 kcpc_ctx_t *ctx; 931 uint64_t bitmap; 932 933 if (pcbe_ops == NULL || 934 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 935 return (DDI_INTR_UNCLAIMED); 936 937 /* 938 * Prevent any further interrupts. 939 */ 940 pcbe_ops->pcbe_allstop(); 941 942 /* 943 * Invoke the "generic" handler. 944 * 945 * If the interrupt has occurred in the context of an lwp owning 946 * the counters, then the handler posts an AST to the lwp to 947 * trigger the actual sampling, and optionally deliver a signal or 948 * restart the counters, on the way out of the kernel using 949 * kcpc_hw_overflow_ast() (see below). 950 * 951 * On the other hand, if the handler returns the context to us 952 * directly, then it means that there are no other threads in 953 * the middle of updating it, no AST has been posted, and so we 954 * should sample the counters here, and restart them with no 955 * further fuss. 956 */ 957 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 958 uint64_t curtick = KCPC_GET_TICK(); 959 960 ctx->kc_hrtime = gethrtime_waitfree(); 961 ctx->kc_vtick += curtick - ctx->kc_rawtick; 962 ctx->kc_rawtick = curtick; 963 pcbe_ops->pcbe_sample(ctx); 964 pcbe_ops->pcbe_program(ctx); 965 } 966 967 return (DDI_INTR_CLAIMED); 968 } 969 970 /* 971 * Called from trap() when processing the ast posted by the high-level 972 * interrupt handler. 973 */ 974 int 975 kcpc_overflow_ast() 976 { 977 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 978 int i; 979 int found = 0; 980 uint64_t curtick = KCPC_GET_TICK(); 981 982 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 983 984 /* 985 * An overflow happened: sample the context to ensure that 986 * the overflow is propagated into the upper bits of the 987 * virtualized 64-bit counter(s). 988 */ 989 kpreempt_disable(); 990 ctx->kc_hrtime = gethrtime_waitfree(); 991 pcbe_ops->pcbe_sample(ctx); 992 kpreempt_enable(); 993 994 ctx->kc_vtick += curtick - ctx->kc_rawtick; 995 996 /* 997 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 998 * if that pic generated an overflow and if the request it was counting 999 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 1000 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 1001 * found any overflowed pics, keep the context frozen and return true 1002 * (thus causing a signal to be sent). 1003 */ 1004 for (i = 0; i < cpc_ncounters; i++) { 1005 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1006 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1007 ~KCPC_PIC_OVERFLOWED); 1008 found = 1; 1009 } 1010 } 1011 if (found) 1012 return (1); 1013 1014 /* 1015 * Otherwise, re-enable the counters and continue life as before. 1016 */ 1017 kpreempt_disable(); 1018 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 1019 pcbe_ops->pcbe_program(ctx); 1020 kpreempt_enable(); 1021 return (0); 1022 } 1023 1024 /* 1025 * Called when switching away from current thread. 1026 */ 1027 static void 1028 kcpc_save(kcpc_ctx_t *ctx) 1029 { 1030 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1031 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 1032 return; 1033 /* 1034 * This context has been invalidated but the counters have not 1035 * been stopped. Stop them here and mark the context stopped. 1036 */ 1037 pcbe_ops->pcbe_allstop(); 1038 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1039 return; 1040 } 1041 1042 pcbe_ops->pcbe_allstop(); 1043 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1044 return; 1045 1046 /* 1047 * Need to sample for all reqs into each req's current mpic. 1048 */ 1049 ctx->kc_hrtime = gethrtime(); 1050 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1051 pcbe_ops->pcbe_sample(ctx); 1052 } 1053 1054 static void 1055 kcpc_restore(kcpc_ctx_t *ctx) 1056 { 1057 mutex_enter(&ctx->kc_lock); 1058 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1059 KCPC_CTX_INVALID) 1060 /* 1061 * The context is invalidated but has not been marked stopped. 1062 * We mark it as such here because we will not start the 1063 * counters during this context switch. 1064 */ 1065 ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED; 1066 1067 1068 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1069 mutex_exit(&ctx->kc_lock); 1070 return; 1071 } 1072 1073 /* 1074 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1075 * ctx & set related memory objects being freed without us knowing. 1076 * This can happen if an agent thread is executing a kcpc_unbind(), 1077 * with this thread as the target, whilst we're concurrently doing a 1078 * restorectx() during, for example, a proc_exit(). Effectively, by 1079 * doing this, we're asking kcpc_free() to cv_wait() until 1080 * kcpc_restore() has completed. 1081 */ 1082 ctx->kc_flags |= KCPC_CTX_RESTORE; 1083 mutex_exit(&ctx->kc_lock); 1084 1085 /* 1086 * While programming the hardware, the counters should be stopped. We 1087 * don't do an explicit pcbe_allstop() here because they should have 1088 * been stopped already by the last consumer. 1089 */ 1090 ctx->kc_rawtick = KCPC_GET_TICK(); 1091 pcbe_ops->pcbe_program(ctx); 1092 1093 /* 1094 * Wake the agent thread if it's waiting in kcpc_free(). 1095 */ 1096 mutex_enter(&ctx->kc_lock); 1097 ctx->kc_flags &= ~KCPC_CTX_RESTORE; 1098 cv_signal(&ctx->kc_condv); 1099 mutex_exit(&ctx->kc_lock); 1100 } 1101 1102 /* 1103 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1104 * following context operators to the idle thread on each CPU. They stop the 1105 * counters when the idle thread is switched on, and they start them again when 1106 * it is switched off. 1107 */ 1108 1109 /*ARGSUSED*/ 1110 void 1111 kcpc_idle_save(struct cpu *cp) 1112 { 1113 /* 1114 * The idle thread shouldn't be run anywhere else. 1115 */ 1116 ASSERT(CPU == cp); 1117 1118 /* 1119 * We must hold the CPU's context lock to ensure the context isn't freed 1120 * while we're looking at it. 1121 */ 1122 mutex_enter(&cp->cpu_cpc_ctxlock); 1123 1124 if ((cp->cpu_cpc_ctx == NULL) || 1125 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1126 mutex_exit(&cp->cpu_cpc_ctxlock); 1127 return; 1128 } 1129 1130 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1131 mutex_exit(&cp->cpu_cpc_ctxlock); 1132 } 1133 1134 void 1135 kcpc_idle_restore(struct cpu *cp) 1136 { 1137 /* 1138 * The idle thread shouldn't be run anywhere else. 1139 */ 1140 ASSERT(CPU == cp); 1141 1142 /* 1143 * We must hold the CPU's context lock to ensure the context isn't freed 1144 * while we're looking at it. 1145 */ 1146 mutex_enter(&cp->cpu_cpc_ctxlock); 1147 1148 if ((cp->cpu_cpc_ctx == NULL) || 1149 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1150 mutex_exit(&cp->cpu_cpc_ctxlock); 1151 return; 1152 } 1153 1154 pcbe_ops->pcbe_allstop(); 1155 mutex_exit(&cp->cpu_cpc_ctxlock); 1156 } 1157 1158 /*ARGSUSED*/ 1159 static void 1160 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1161 { 1162 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1163 int i; 1164 1165 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1166 return; 1167 1168 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1169 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1170 rw_exit(&kcpc_cpuctx_lock); 1171 return; 1172 } 1173 cctx = kcpc_ctx_alloc(); 1174 kcpc_ctx_clone(ctx, cctx); 1175 rw_exit(&kcpc_cpuctx_lock); 1176 1177 /* 1178 * Copy the parent context's kc_flags field, but don't overwrite 1179 * the child's in case it was modified during kcpc_ctx_clone. 1180 */ 1181 cctx->kc_flags |= ctx->kc_flags; 1182 cctx->kc_thread = ct; 1183 cctx->kc_cpuid = -1; 1184 ct->t_cpc_set = cctx->kc_set; 1185 ct->t_cpc_ctx = cctx; 1186 1187 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1188 kcpc_set_t *ks = cctx->kc_set; 1189 /* 1190 * Our contract with the user requires us to immediately send an 1191 * overflow signal to all children if we have the LWPINHERIT 1192 * and SIGOVF flags set. In addition, all counters should be 1193 * set to UINT64_MAX, and their pic's overflow flag turned on 1194 * so that our trap() processing knows to send a signal. 1195 */ 1196 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1197 for (i = 0; i < ks->ks_nreqs; i++) { 1198 kcpc_request_t *kr = &ks->ks_req[i]; 1199 1200 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1201 *(kr->kr_data) = UINT64_MAX; 1202 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1203 } 1204 } 1205 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1206 aston(ct); 1207 } 1208 1209 installctx(ct, cctx, kcpc_save, kcpc_restore, 1210 NULL, kcpc_lwp_create, NULL, kcpc_free); 1211 } 1212 1213 /* 1214 * Counter Stoppage Theory 1215 * 1216 * The counters may need to be stopped properly at the following occasions: 1217 * 1218 * 1) An LWP exits. 1219 * 2) A thread exits. 1220 * 3) An LWP performs an exec(). 1221 * 4) A bound set is unbound. 1222 * 1223 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1224 * to be freed as well. 1225 * 1226 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1227 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1228 * context. 1229 * 1230 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1231 * 1232 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1233 * been called from exec. It stops the counters _and_ frees the context. 1234 * 1235 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1236 * 1237 * CPU-bound counters are always stopped via kcpc_unbind(). 1238 */ 1239 1240 /* 1241 * We're being called to delete the context; we ensure that all associated data 1242 * structures are freed, and that the hardware is passivated if this is an exec. 1243 */ 1244 1245 /*ARGSUSED*/ 1246 static void 1247 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1248 { 1249 int i; 1250 kcpc_set_t *set = ctx->kc_set; 1251 1252 ASSERT(set != NULL); 1253 1254 /* 1255 * Wait for kcpc_restore() to finish before we tear things down. 1256 */ 1257 mutex_enter(&ctx->kc_lock); 1258 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1259 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1260 ctx->kc_flags |= KCPC_CTX_INVALID; 1261 mutex_exit(&ctx->kc_lock); 1262 1263 if (isexec) { 1264 /* 1265 * This thread is execing, and after the exec it should not have 1266 * any performance counter context. Stop the counters properly 1267 * here so the system isn't surprised by an overflow interrupt 1268 * later. 1269 */ 1270 if (ctx->kc_cpuid != -1) { 1271 cpu_t *cp; 1272 /* 1273 * CPU-bound context; stop the appropriate CPU's ctrs. 1274 * Hold cpu_lock while examining the CPU to ensure it 1275 * doesn't go away. 1276 */ 1277 mutex_enter(&cpu_lock); 1278 cp = cpu_get(ctx->kc_cpuid); 1279 /* 1280 * The CPU could have been DR'd out, so only stop the 1281 * CPU and clear its context pointer if the CPU still 1282 * exists. 1283 */ 1284 if (cp != NULL) { 1285 mutex_enter(&cp->cpu_cpc_ctxlock); 1286 kcpc_stop_hw(ctx); 1287 cp->cpu_cpc_ctx = NULL; 1288 mutex_exit(&cp->cpu_cpc_ctxlock); 1289 } 1290 mutex_exit(&cpu_lock); 1291 ASSERT(curthread->t_cpc_ctx == NULL); 1292 } else { 1293 /* 1294 * Thread-bound context; stop _this_ CPU's counters. 1295 */ 1296 kpreempt_disable(); 1297 pcbe_ops->pcbe_allstop(); 1298 atomic_or_uint(&ctx->kc_flags, 1299 KCPC_CTX_INVALID_STOPPED); 1300 kpreempt_enable(); 1301 curthread->t_cpc_ctx = NULL; 1302 } 1303 1304 /* 1305 * Since we are being called from an exec and we know that 1306 * exec is not permitted via the agent thread, we should clean 1307 * up this thread's CPC state completely, and not leave dangling 1308 * CPC pointers behind. 1309 */ 1310 ASSERT(ctx->kc_thread == curthread); 1311 curthread->t_cpc_set = NULL; 1312 } 1313 1314 /* 1315 * Walk through each request in this context's set and free the PCBE's 1316 * configuration if it exists. 1317 */ 1318 for (i = 0; i < set->ks_nreqs; i++) { 1319 if (set->ks_req[i].kr_config != NULL) 1320 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1321 } 1322 1323 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1324 kcpc_ctx_free(ctx); 1325 kcpc_free_set(set); 1326 } 1327 1328 /* 1329 * Free the memory associated with a request set. 1330 */ 1331 void 1332 kcpc_free_set(kcpc_set_t *set) 1333 { 1334 int i; 1335 kcpc_request_t *req; 1336 1337 ASSERT(set->ks_req != NULL); 1338 1339 for (i = 0; i < set->ks_nreqs; i++) { 1340 req = &set->ks_req[i]; 1341 1342 if (req->kr_nattrs != 0) { 1343 kmem_free(req->kr_attr, 1344 req->kr_nattrs * sizeof (kcpc_attr_t)); 1345 } 1346 } 1347 1348 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1349 cv_destroy(&set->ks_condv); 1350 mutex_destroy(&set->ks_lock); 1351 kmem_free(set, sizeof (kcpc_set_t)); 1352 } 1353 1354 /* 1355 * Grab every existing context and mark it as invalid. 1356 */ 1357 void 1358 kcpc_invalidate_all(void) 1359 { 1360 kcpc_ctx_t *ctx; 1361 long hash; 1362 1363 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1364 mutex_enter(&kcpc_ctx_llock[hash]); 1365 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1366 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1367 mutex_exit(&kcpc_ctx_llock[hash]); 1368 } 1369 } 1370 1371 /* 1372 * Interface for PCBEs to signal that an existing configuration has suddenly 1373 * become invalid. 1374 */ 1375 void 1376 kcpc_invalidate_config(void *token) 1377 { 1378 kcpc_ctx_t *ctx = token; 1379 1380 ASSERT(ctx != NULL); 1381 1382 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1383 } 1384 1385 /* 1386 * Called from lwp_exit() and thread_exit() 1387 */ 1388 void 1389 kcpc_passivate(void) 1390 { 1391 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1392 kcpc_set_t *set = curthread->t_cpc_set; 1393 1394 if (set == NULL) 1395 return; 1396 1397 /* 1398 * We're cleaning up after this thread; ensure there are no dangling 1399 * CPC pointers left behind. The context and set will be freed by 1400 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1401 * the case of a CPU-bound set. 1402 */ 1403 curthread->t_cpc_ctx = NULL; 1404 1405 if (ctx == NULL) { 1406 /* 1407 * This thread has a set but no context; it must be a CPU-bound 1408 * set. The hardware will be stopped via kcpc_unbind() when the 1409 * process exits and closes its file descriptors with 1410 * kcpc_close(). Our only job here is to clean up this thread's 1411 * state; the set will be freed with the unbind(). 1412 */ 1413 (void) kcpc_unbind(set); 1414 /* 1415 * Unbinding a set belonging to the current thread should clear 1416 * its set pointer. 1417 */ 1418 ASSERT(curthread->t_cpc_set == NULL); 1419 return; 1420 } 1421 1422 curthread->t_cpc_set = NULL; 1423 1424 /* 1425 * This thread/LWP is exiting but context switches will continue to 1426 * happen for a bit as the exit proceeds. Kernel preemption must be 1427 * disabled here to prevent a race between checking or setting the 1428 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1429 * a context switch. 1430 */ 1431 1432 kpreempt_disable(); 1433 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1434 pcbe_ops->pcbe_allstop(); 1435 atomic_or_uint(&ctx->kc_flags, 1436 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1437 } 1438 kpreempt_enable(); 1439 } 1440 1441 /* 1442 * Assign the requests in the given set to the PICs in the context. 1443 * Returns 0 if successful, -1 on failure. 1444 */ 1445 /*ARGSUSED*/ 1446 static int 1447 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1448 { 1449 int i; 1450 int *picnum_save; 1451 1452 ASSERT(set->ks_nreqs <= cpc_ncounters); 1453 1454 /* 1455 * Provide kcpc_tryassign() with scratch space to avoid doing an 1456 * alloc/free with every invocation. 1457 */ 1458 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1459 /* 1460 * kcpc_tryassign() blindly walks through each request in the set, 1461 * seeing if a counter can count its event. If yes, it assigns that 1462 * counter. However, that counter may have been the only capable counter 1463 * for _another_ request's event. The solution is to try every possible 1464 * request first. Note that this does not cover all solutions, as 1465 * that would require all unique orderings of requests, an n^n operation 1466 * which would be unacceptable for architectures with many counters. 1467 */ 1468 for (i = 0; i < set->ks_nreqs; i++) 1469 if (kcpc_tryassign(set, i, picnum_save) == 0) 1470 break; 1471 1472 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1473 if (i == set->ks_nreqs) 1474 return (-1); 1475 return (0); 1476 } 1477 1478 static int 1479 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1480 { 1481 int i; 1482 int j; 1483 uint64_t bitmap = 0, resmap = 0; 1484 uint64_t ctrmap; 1485 1486 /* 1487 * We are attempting to assign the reqs to pics, but we may fail. If we 1488 * fail, we need to restore the state of the requests to what it was 1489 * when we found it, as some reqs may have been explicitly assigned to 1490 * a specific PIC beforehand. We do this by snapshotting the assignments 1491 * now and restoring from it later if we fail. 1492 * 1493 * Also we note here which counters have already been claimed by 1494 * requests with explicit counter assignments. 1495 */ 1496 for (i = 0; i < set->ks_nreqs; i++) { 1497 scratch[i] = set->ks_req[i].kr_picnum; 1498 if (set->ks_req[i].kr_picnum != -1) 1499 resmap |= (1 << set->ks_req[i].kr_picnum); 1500 } 1501 1502 /* 1503 * Walk through requests assigning them to the first PIC that is 1504 * capable. 1505 */ 1506 i = starting_req; 1507 do { 1508 if (set->ks_req[i].kr_picnum != -1) { 1509 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1510 bitmap |= (1 << set->ks_req[i].kr_picnum); 1511 if (++i == set->ks_nreqs) 1512 i = 0; 1513 continue; 1514 } 1515 1516 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1517 for (j = 0; j < cpc_ncounters; j++) { 1518 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1519 (resmap & (1 << j)) == 0) { 1520 /* 1521 * We can assign this counter because: 1522 * 1523 * 1. It can count the event (ctrmap) 1524 * 2. It hasn't been assigned yet (bitmap) 1525 * 3. It wasn't reserved by a request (resmap) 1526 */ 1527 bitmap |= (1 << j); 1528 break; 1529 } 1530 } 1531 if (j == cpc_ncounters) { 1532 for (i = 0; i < set->ks_nreqs; i++) 1533 set->ks_req[i].kr_picnum = scratch[i]; 1534 return (-1); 1535 } 1536 set->ks_req[i].kr_picnum = j; 1537 1538 if (++i == set->ks_nreqs) 1539 i = 0; 1540 } while (i != starting_req); 1541 1542 return (0); 1543 } 1544 1545 kcpc_set_t * 1546 kcpc_dup_set(kcpc_set_t *set) 1547 { 1548 kcpc_set_t *new; 1549 int i; 1550 int j; 1551 1552 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1553 new->ks_state &= ~KCPC_SET_BOUND; 1554 new->ks_flags = set->ks_flags; 1555 new->ks_nreqs = set->ks_nreqs; 1556 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1557 KM_SLEEP); 1558 new->ks_data = NULL; 1559 new->ks_ctx = NULL; 1560 1561 for (i = 0; i < new->ks_nreqs; i++) { 1562 new->ks_req[i].kr_config = NULL; 1563 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1564 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1565 new->ks_req[i].kr_picp = NULL; 1566 new->ks_req[i].kr_data = NULL; 1567 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1568 CPC_MAX_EVENT_LEN); 1569 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1570 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1571 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1572 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1573 sizeof (kcpc_attr_t), KM_SLEEP); 1574 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1575 new->ks_req[i].kr_attr[j].ka_val = 1576 set->ks_req[i].kr_attr[j].ka_val; 1577 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1578 set->ks_req[i].kr_attr[j].ka_name, 1579 CPC_MAX_ATTR_LEN); 1580 } 1581 } 1582 1583 return (new); 1584 } 1585 1586 int 1587 kcpc_allow_nonpriv(void *token) 1588 { 1589 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1590 } 1591 1592 void 1593 kcpc_invalidate(kthread_t *t) 1594 { 1595 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1596 1597 if (ctx != NULL) 1598 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1599 } 1600 1601 /* 1602 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1603 * are used to construct PCBE names, starting with the most specific, 1604 * "pcbe.first.second.third.fourth" and ending with the least specific, 1605 * "pcbe.first". 1606 * 1607 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1608 */ 1609 int 1610 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1611 { 1612 uint_t s[3]; 1613 1614 s[0] = first; 1615 s[1] = second; 1616 s[2] = third; 1617 1618 return (modload_qualified("pcbe", 1619 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1620 } 1621