1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/thread.h> 29 #include <sys/cpuvar.h> 30 #include <sys/inttypes.h> 31 #include <sys/cmn_err.h> 32 #include <sys/time.h> 33 #include <sys/ksynch.h> 34 #include <sys/systm.h> 35 #include <sys/kcpc.h> 36 #include <sys/cpc_impl.h> 37 #include <sys/cpc_pcbe.h> 38 #include <sys/atomic.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/sdt.h> 42 #if defined(__x86) 43 #include <asm/clock.h> 44 #endif 45 46 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 47 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 48 49 50 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 51 int kcpc_cpuctx; /* number of cpu-specific contexts */ 52 53 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 54 55 /* 56 * These are set when a PCBE module is loaded. 57 */ 58 uint_t cpc_ncounters = 0; 59 pcbe_ops_t *pcbe_ops = NULL; 60 61 /* 62 * Statistics on (mis)behavior 63 */ 64 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 65 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 66 67 /* 68 * Is misbehaviour (overflow in a thread with no context) fatal? 69 */ 70 #ifdef DEBUG 71 static int kcpc_nullctx_panic = 1; 72 #else 73 static int kcpc_nullctx_panic = 0; 74 #endif 75 76 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 77 static void kcpc_restore(kcpc_ctx_t *ctx); 78 static void kcpc_save(kcpc_ctx_t *ctx); 79 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 80 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 81 static void kcpc_free_configs(kcpc_set_t *set); 82 static kcpc_ctx_t *kcpc_ctx_alloc(void); 83 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 84 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 85 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 86 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 87 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 88 89 void 90 kcpc_register_pcbe(pcbe_ops_t *ops) 91 { 92 pcbe_ops = ops; 93 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 94 } 95 96 int 97 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 98 { 99 cpu_t *cp; 100 kcpc_ctx_t *ctx; 101 int error; 102 103 ctx = kcpc_ctx_alloc(); 104 105 if (kcpc_assign_reqs(set, ctx) != 0) { 106 kcpc_ctx_free(ctx); 107 *subcode = CPC_RESOURCE_UNAVAIL; 108 return (EINVAL); 109 } 110 111 ctx->kc_cpuid = cpuid; 112 ctx->kc_thread = curthread; 113 114 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 115 116 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 117 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 118 kcpc_ctx_free(ctx); 119 return (error); 120 } 121 122 set->ks_ctx = ctx; 123 ctx->kc_set = set; 124 125 /* 126 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 127 * we are manipulating the cpu_t and programming the hardware, else the 128 * the cpu_t could go away while we're looking at it. 129 */ 130 mutex_enter(&cpu_lock); 131 cp = cpu_get(cpuid); 132 133 if (cp == NULL) 134 /* 135 * The CPU could have been DRd out while we were getting set up. 136 */ 137 goto unbound; 138 139 mutex_enter(&cp->cpu_cpc_ctxlock); 140 141 if (cp->cpu_cpc_ctx != NULL) { 142 /* 143 * If this CPU already has a bound set, return an error. 144 */ 145 mutex_exit(&cp->cpu_cpc_ctxlock); 146 goto unbound; 147 } 148 149 if (curthread->t_bind_cpu != cpuid) { 150 mutex_exit(&cp->cpu_cpc_ctxlock); 151 goto unbound; 152 } 153 cp->cpu_cpc_ctx = ctx; 154 155 /* 156 * Kernel preemption must be disabled while fiddling with the hardware 157 * registers to prevent partial updates. 158 */ 159 kpreempt_disable(); 160 ctx->kc_rawtick = KCPC_GET_TICK(); 161 pcbe_ops->pcbe_program(ctx); 162 kpreempt_enable(); 163 164 mutex_exit(&cp->cpu_cpc_ctxlock); 165 mutex_exit(&cpu_lock); 166 167 mutex_enter(&set->ks_lock); 168 set->ks_state |= KCPC_SET_BOUND; 169 cv_signal(&set->ks_condv); 170 mutex_exit(&set->ks_lock); 171 172 return (0); 173 174 unbound: 175 mutex_exit(&cpu_lock); 176 set->ks_ctx = NULL; 177 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 178 kcpc_ctx_free(ctx); 179 return (EAGAIN); 180 } 181 182 int 183 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 184 { 185 kcpc_ctx_t *ctx; 186 int error; 187 188 /* 189 * Only one set is allowed per context, so ensure there is no 190 * existing context. 191 */ 192 193 if (t->t_cpc_ctx != NULL) 194 return (EEXIST); 195 196 ctx = kcpc_ctx_alloc(); 197 198 /* 199 * The context must begin life frozen until it has been properly 200 * programmed onto the hardware. This prevents the context ops from 201 * worrying about it until we're ready. 202 */ 203 ctx->kc_flags |= KCPC_CTX_FREEZE; 204 ctx->kc_hrtime = gethrtime(); 205 206 if (kcpc_assign_reqs(set, ctx) != 0) { 207 kcpc_ctx_free(ctx); 208 *subcode = CPC_RESOURCE_UNAVAIL; 209 return (EINVAL); 210 } 211 212 ctx->kc_cpuid = -1; 213 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 214 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 215 ctx->kc_thread = t; 216 t->t_cpc_ctx = ctx; 217 /* 218 * Permit threads to look at their own hardware counters from userland. 219 */ 220 ctx->kc_flags |= KCPC_CTX_NONPRIV; 221 222 /* 223 * Create the data store for this set. 224 */ 225 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 226 227 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 228 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 229 kcpc_ctx_free(ctx); 230 t->t_cpc_ctx = NULL; 231 return (error); 232 } 233 234 set->ks_ctx = ctx; 235 ctx->kc_set = set; 236 237 /* 238 * Add a device context to the subject thread. 239 */ 240 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 241 kcpc_lwp_create, NULL, kcpc_free); 242 243 /* 244 * Ask the backend to program the hardware. 245 */ 246 if (t == curthread) { 247 kpreempt_disable(); 248 ctx->kc_rawtick = KCPC_GET_TICK(); 249 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 250 pcbe_ops->pcbe_program(ctx); 251 kpreempt_enable(); 252 } else 253 /* 254 * Since we are the agent LWP, we know the victim LWP is stopped 255 * until we're done here; no need to worry about preemption or 256 * migration here. We still use an atomic op to clear the flag 257 * to ensure the flags are always self-consistent; they can 258 * still be accessed from, for instance, another CPU doing a 259 * kcpc_invalidate_all(). 260 */ 261 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 262 263 mutex_enter(&set->ks_lock); 264 set->ks_state |= KCPC_SET_BOUND; 265 cv_signal(&set->ks_condv); 266 mutex_exit(&set->ks_lock); 267 268 return (0); 269 } 270 271 /* 272 * Walk through each request in the set and ask the PCBE to configure a 273 * corresponding counter. 274 */ 275 static int 276 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 277 { 278 int i; 279 int ret; 280 kcpc_request_t *rp; 281 282 for (i = 0; i < set->ks_nreqs; i++) { 283 int n; 284 rp = &set->ks_req[i]; 285 286 n = rp->kr_picnum; 287 288 ASSERT(n >= 0 && n < cpc_ncounters); 289 290 ASSERT(ctx->kc_pics[n].kp_req == NULL); 291 292 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 293 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 294 == 0) { 295 *subcode = -1; 296 return (ENOTSUP); 297 } 298 /* 299 * If any of the counters have requested overflow 300 * notification, we flag the context as being one that 301 * cares about overflow. 302 */ 303 ctx->kc_flags |= KCPC_CTX_SIGOVF; 304 } 305 306 rp->kr_config = NULL; 307 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 308 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 309 &(rp->kr_config), (void *)ctx)) != 0) { 310 kcpc_free_configs(set); 311 *subcode = ret; 312 switch (ret) { 313 case CPC_ATTR_REQUIRES_PRIVILEGE: 314 case CPC_HV_NO_ACCESS: 315 return (EACCES); 316 default: 317 return (EINVAL); 318 } 319 } 320 321 ctx->kc_pics[n].kp_req = rp; 322 rp->kr_picp = &ctx->kc_pics[n]; 323 rp->kr_data = set->ks_data + rp->kr_index; 324 *rp->kr_data = rp->kr_preset; 325 } 326 327 return (0); 328 } 329 330 static void 331 kcpc_free_configs(kcpc_set_t *set) 332 { 333 int i; 334 335 for (i = 0; i < set->ks_nreqs; i++) 336 if (set->ks_req[i].kr_config != NULL) 337 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 338 } 339 340 /* 341 * buf points to a user address and the data should be copied out to that 342 * address in the current process. 343 */ 344 int 345 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 346 { 347 kcpc_ctx_t *ctx = set->ks_ctx; 348 uint64_t curtick = KCPC_GET_TICK(); 349 350 mutex_enter(&set->ks_lock); 351 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 352 mutex_exit(&set->ks_lock); 353 return (EINVAL); 354 } 355 mutex_exit(&set->ks_lock); 356 357 if (ctx->kc_flags & KCPC_CTX_INVALID) 358 return (EAGAIN); 359 360 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 361 /* 362 * Kernel preemption must be disabled while reading the 363 * hardware regs, and if this is a CPU-bound context, while 364 * checking the CPU binding of the current thread. 365 */ 366 kpreempt_disable(); 367 368 if (ctx->kc_cpuid != -1) { 369 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 370 kpreempt_enable(); 371 return (EAGAIN); 372 } 373 } 374 375 if (ctx->kc_thread == curthread) { 376 ctx->kc_hrtime = gethrtime(); 377 pcbe_ops->pcbe_sample(ctx); 378 ctx->kc_vtick += curtick - ctx->kc_rawtick; 379 ctx->kc_rawtick = curtick; 380 } 381 382 kpreempt_enable(); 383 384 /* 385 * The config may have been invalidated by 386 * the pcbe_sample op. 387 */ 388 if (ctx->kc_flags & KCPC_CTX_INVALID) 389 return (EAGAIN); 390 } 391 392 if (copyout(set->ks_data, buf, 393 set->ks_nreqs * sizeof (uint64_t)) == -1) 394 return (EFAULT); 395 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 396 return (EFAULT); 397 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 398 return (EFAULT); 399 400 return (0); 401 } 402 403 /* 404 * Stop the counters on the CPU this context is bound to. 405 */ 406 static void 407 kcpc_stop_hw(kcpc_ctx_t *ctx) 408 { 409 cpu_t *cp; 410 411 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 412 == KCPC_CTX_INVALID); 413 414 kpreempt_disable(); 415 416 cp = cpu_get(ctx->kc_cpuid); 417 ASSERT(cp != NULL); 418 419 if (cp == CPU) { 420 pcbe_ops->pcbe_allstop(); 421 atomic_or_uint(&ctx->kc_flags, 422 KCPC_CTX_INVALID_STOPPED); 423 } else 424 kcpc_remote_stop(cp); 425 kpreempt_enable(); 426 } 427 428 int 429 kcpc_unbind(kcpc_set_t *set) 430 { 431 kcpc_ctx_t *ctx; 432 kthread_t *t; 433 434 /* 435 * We could be racing with the process's agent thread as it 436 * binds the set; we must wait for the set to finish binding 437 * before attempting to tear it down. 438 */ 439 mutex_enter(&set->ks_lock); 440 while ((set->ks_state & KCPC_SET_BOUND) == 0) 441 cv_wait(&set->ks_condv, &set->ks_lock); 442 mutex_exit(&set->ks_lock); 443 444 ctx = set->ks_ctx; 445 446 /* 447 * Use kc_lock to synchronize with kcpc_restore(). 448 */ 449 mutex_enter(&ctx->kc_lock); 450 ctx->kc_flags |= KCPC_CTX_INVALID; 451 mutex_exit(&ctx->kc_lock); 452 453 if (ctx->kc_cpuid == -1) { 454 t = ctx->kc_thread; 455 /* 456 * The context is thread-bound and therefore has a device 457 * context. It will be freed via removectx() calling 458 * freectx() calling kcpc_free(). 459 */ 460 if (t == curthread && 461 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 462 kpreempt_disable(); 463 pcbe_ops->pcbe_allstop(); 464 atomic_or_uint(&ctx->kc_flags, 465 KCPC_CTX_INVALID_STOPPED); 466 kpreempt_enable(); 467 } 468 #ifdef DEBUG 469 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 470 kcpc_lwp_create, NULL, kcpc_free) == 0) 471 panic("kcpc_unbind: context %p not preset on thread %p", 472 (void *)ctx, (void *)t); 473 #else 474 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 475 kcpc_lwp_create, NULL, kcpc_free); 476 #endif /* DEBUG */ 477 t->t_cpc_set = NULL; 478 t->t_cpc_ctx = NULL; 479 } else { 480 /* 481 * If we are unbinding a CPU-bound set from a remote CPU, the 482 * native CPU's idle thread could be in the midst of programming 483 * this context onto the CPU. We grab the context's lock here to 484 * ensure that the idle thread is done with it. When we release 485 * the lock, the CPU no longer has a context and the idle thread 486 * will move on. 487 * 488 * cpu_lock must be held to prevent the CPU from being DR'd out 489 * while we disassociate the context from the cpu_t. 490 */ 491 cpu_t *cp; 492 mutex_enter(&cpu_lock); 493 cp = cpu_get(ctx->kc_cpuid); 494 if (cp != NULL) { 495 /* 496 * The CPU may have been DR'd out of the system. 497 */ 498 mutex_enter(&cp->cpu_cpc_ctxlock); 499 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 500 kcpc_stop_hw(ctx); 501 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 502 cp->cpu_cpc_ctx = NULL; 503 mutex_exit(&cp->cpu_cpc_ctxlock); 504 } 505 mutex_exit(&cpu_lock); 506 if (ctx->kc_thread == curthread) { 507 kcpc_free(ctx, 0); 508 curthread->t_cpc_set = NULL; 509 } 510 } 511 512 return (0); 513 } 514 515 int 516 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 517 { 518 int i; 519 520 ASSERT(set != NULL); 521 ASSERT(set->ks_state & KCPC_SET_BOUND); 522 ASSERT(set->ks_ctx->kc_thread == curthread); 523 ASSERT(set->ks_ctx->kc_cpuid == -1); 524 525 if (index < 0 || index >= set->ks_nreqs) 526 return (EINVAL); 527 528 for (i = 0; i < set->ks_nreqs; i++) 529 if (set->ks_req[i].kr_index == index) 530 break; 531 ASSERT(i != set->ks_nreqs); 532 533 set->ks_req[i].kr_preset = preset; 534 return (0); 535 } 536 537 int 538 kcpc_restart(kcpc_set_t *set) 539 { 540 kcpc_ctx_t *ctx = set->ks_ctx; 541 int i; 542 543 ASSERT(set->ks_state & KCPC_SET_BOUND); 544 ASSERT(ctx->kc_thread == curthread); 545 ASSERT(ctx->kc_cpuid == -1); 546 547 kpreempt_disable(); 548 549 /* 550 * If the user is doing this on a running set, make sure the counters 551 * are stopped first. 552 */ 553 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 554 pcbe_ops->pcbe_allstop(); 555 556 for (i = 0; i < set->ks_nreqs; i++) { 557 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 558 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 559 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 560 } 561 562 /* 563 * Ask the backend to program the hardware. 564 */ 565 ctx->kc_rawtick = KCPC_GET_TICK(); 566 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 567 pcbe_ops->pcbe_program(ctx); 568 kpreempt_enable(); 569 570 return (0); 571 } 572 573 /* 574 * Caller must hold kcpc_cpuctx_lock. 575 */ 576 int 577 kcpc_enable(kthread_t *t, int cmd, int enable) 578 { 579 kcpc_ctx_t *ctx = t->t_cpc_ctx; 580 kcpc_set_t *set = t->t_cpc_set; 581 kcpc_set_t *newset; 582 int i; 583 int flag; 584 int err; 585 586 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 587 588 if (ctx == NULL) { 589 /* 590 * This thread has a set but no context; it must be a 591 * CPU-bound set. 592 */ 593 ASSERT(t->t_cpc_set != NULL); 594 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 595 return (EINVAL); 596 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 597 return (EAGAIN); 598 599 if (cmd == CPC_ENABLE) { 600 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 601 return (EINVAL); 602 kpreempt_disable(); 603 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 604 kcpc_restore(ctx); 605 kpreempt_enable(); 606 } else if (cmd == CPC_DISABLE) { 607 if (ctx->kc_flags & KCPC_CTX_FREEZE) 608 return (EINVAL); 609 kpreempt_disable(); 610 kcpc_save(ctx); 611 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 612 kpreempt_enable(); 613 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 614 /* 615 * Strategy for usr/sys: stop counters and update set's presets 616 * with current counter values, unbind, update requests with 617 * new config, then re-bind. 618 */ 619 flag = (cmd == CPC_USR_EVENTS) ? 620 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 621 622 kpreempt_disable(); 623 atomic_or_uint(&ctx->kc_flags, 624 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 625 pcbe_ops->pcbe_allstop(); 626 kpreempt_enable(); 627 for (i = 0; i < set->ks_nreqs; i++) { 628 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 629 if (enable) 630 set->ks_req[i].kr_flags |= flag; 631 else 632 set->ks_req[i].kr_flags &= ~flag; 633 } 634 newset = kcpc_dup_set(set); 635 if (kcpc_unbind(set) != 0) 636 return (EINVAL); 637 t->t_cpc_set = newset; 638 if (kcpc_bind_thread(newset, t, &err) != 0) { 639 t->t_cpc_set = NULL; 640 kcpc_free_set(newset); 641 return (EINVAL); 642 } 643 } else 644 return (EINVAL); 645 646 return (0); 647 } 648 649 /* 650 * Provide PCBEs with a way of obtaining the configs of every counter which will 651 * be programmed together. 652 * 653 * If current is NULL, provide the first config. 654 * 655 * If data != NULL, caller wants to know where the data store associated with 656 * the config we return is located. 657 */ 658 void * 659 kcpc_next_config(void *token, void *current, uint64_t **data) 660 { 661 int i; 662 kcpc_pic_t *pic; 663 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 664 665 if (current == NULL) { 666 /* 667 * Client would like the first config, which may not be in 668 * counter 0; we need to search through the counters for the 669 * first config. 670 */ 671 for (i = 0; i < cpc_ncounters; i++) 672 if (ctx->kc_pics[i].kp_req != NULL) 673 break; 674 /* 675 * There are no counters configured for the given context. 676 */ 677 if (i == cpc_ncounters) 678 return (NULL); 679 } else { 680 /* 681 * There surely is a faster way to do this. 682 */ 683 for (i = 0; i < cpc_ncounters; i++) { 684 pic = &ctx->kc_pics[i]; 685 686 if (pic->kp_req != NULL && 687 current == pic->kp_req->kr_config) 688 break; 689 } 690 691 /* 692 * We found the current config at picnum i. Now search for the 693 * next configured PIC. 694 */ 695 for (i++; i < cpc_ncounters; i++) { 696 pic = &ctx->kc_pics[i]; 697 if (pic->kp_req != NULL) 698 break; 699 } 700 701 if (i == cpc_ncounters) 702 return (NULL); 703 } 704 705 if (data != NULL) { 706 *data = ctx->kc_pics[i].kp_req->kr_data; 707 } 708 709 return (ctx->kc_pics[i].kp_req->kr_config); 710 } 711 712 713 static kcpc_ctx_t * 714 kcpc_ctx_alloc(void) 715 { 716 kcpc_ctx_t *ctx; 717 long hash; 718 719 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), KM_SLEEP); 720 721 hash = CPC_HASH_CTX(ctx); 722 mutex_enter(&kcpc_ctx_llock[hash]); 723 ctx->kc_next = kcpc_ctx_list[hash]; 724 kcpc_ctx_list[hash] = ctx; 725 mutex_exit(&kcpc_ctx_llock[hash]); 726 727 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 728 cpc_ncounters, KM_SLEEP); 729 730 ctx->kc_cpuid = -1; 731 732 return (ctx); 733 } 734 735 /* 736 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 737 * in the flags. 738 */ 739 static void 740 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 741 { 742 kcpc_set_t *ks = ctx->kc_set, *cks; 743 int i, j; 744 int code; 745 746 ASSERT(ks != NULL); 747 748 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 749 return; 750 751 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 752 cks->ks_state &= ~KCPC_SET_BOUND; 753 cctx->kc_set = cks; 754 cks->ks_flags = ks->ks_flags; 755 cks->ks_nreqs = ks->ks_nreqs; 756 cks->ks_req = kmem_alloc(cks->ks_nreqs * 757 sizeof (kcpc_request_t), KM_SLEEP); 758 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 759 KM_SLEEP); 760 cks->ks_ctx = cctx; 761 762 for (i = 0; i < cks->ks_nreqs; i++) { 763 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 764 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 765 (void) strncpy(cks->ks_req[i].kr_event, 766 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 767 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 768 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 769 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 770 if (ks->ks_req[i].kr_nattrs > 0) { 771 cks->ks_req[i].kr_attr = 772 kmem_alloc(ks->ks_req[i].kr_nattrs * 773 sizeof (kcpc_attr_t), KM_SLEEP); 774 } 775 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 776 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 777 ks->ks_req[i].kr_attr[j].ka_name, 778 CPC_MAX_ATTR_LEN); 779 cks->ks_req[i].kr_attr[j].ka_val = 780 ks->ks_req[i].kr_attr[j].ka_val; 781 } 782 } 783 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 784 kcpc_invalidate_config(cctx); 785 786 mutex_enter(&cks->ks_lock); 787 cks->ks_state |= KCPC_SET_BOUND; 788 cv_signal(&cks->ks_condv); 789 mutex_exit(&cks->ks_lock); 790 } 791 792 793 static void 794 kcpc_ctx_free(kcpc_ctx_t *ctx) 795 { 796 kcpc_ctx_t **loc; 797 long hash = CPC_HASH_CTX(ctx); 798 799 mutex_enter(&kcpc_ctx_llock[hash]); 800 loc = &kcpc_ctx_list[hash]; 801 ASSERT(*loc != NULL); 802 while (*loc != ctx) 803 loc = &(*loc)->kc_next; 804 *loc = ctx->kc_next; 805 mutex_exit(&kcpc_ctx_llock[hash]); 806 807 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 808 cv_destroy(&ctx->kc_condv); 809 mutex_destroy(&ctx->kc_lock); 810 kmem_free(ctx, sizeof (*ctx)); 811 } 812 813 /* 814 * Generic interrupt handler used on hardware that generates 815 * overflow interrupts. 816 * 817 * Note: executed at high-level interrupt context! 818 */ 819 /*ARGSUSED*/ 820 kcpc_ctx_t * 821 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 822 { 823 kcpc_ctx_t *ctx; 824 kthread_t *t = curthread; 825 int i; 826 827 /* 828 * On both x86 and UltraSPARC, we may deliver the high-level 829 * interrupt in kernel mode, just after we've started to run an 830 * interrupt thread. (That's because the hardware helpfully 831 * delivers the overflow interrupt some random number of cycles 832 * after the instruction that caused the overflow by which time 833 * we're in some part of the kernel, not necessarily running on 834 * the right thread). 835 * 836 * Check for this case here -- find the pinned thread 837 * that was running when the interrupt went off. 838 */ 839 if (t->t_flag & T_INTR_THREAD) { 840 klwp_t *lwp; 841 842 atomic_add_32(&kcpc_intrctx_count, 1); 843 844 /* 845 * Note that t_lwp is always set to point at the underlying 846 * thread, thus this will work in the presence of nested 847 * interrupts. 848 */ 849 ctx = NULL; 850 if ((lwp = t->t_lwp) != NULL) { 851 t = lwptot(lwp); 852 ctx = t->t_cpc_ctx; 853 } 854 } else 855 ctx = t->t_cpc_ctx; 856 857 if (ctx == NULL) { 858 /* 859 * This can easily happen if we're using the counters in 860 * "shared" mode, for example, and an overflow interrupt 861 * occurs while we are running cpustat. In that case, the 862 * bound thread that has the context that belongs to this 863 * CPU is almost certainly sleeping (if it was running on 864 * the CPU we'd have found it above), and the actual 865 * interrupted thread has no knowledge of performance counters! 866 */ 867 ctx = curthread->t_cpu->cpu_cpc_ctx; 868 if (ctx != NULL) { 869 /* 870 * Return the bound context for this CPU to 871 * the interrupt handler so that it can synchronously 872 * sample the hardware counters and restart them. 873 */ 874 return (ctx); 875 } 876 877 /* 878 * As long as the overflow interrupt really is delivered early 879 * enough after trapping into the kernel to avoid switching 880 * threads, we must always be able to find the cpc context, 881 * or something went terribly wrong i.e. we ended up 882 * running a passivated interrupt thread, a kernel 883 * thread or we interrupted idle, all of which are Very Bad. 884 */ 885 if (kcpc_nullctx_panic) 886 panic("null cpc context, thread %p", (void *)t); 887 atomic_add_32(&kcpc_nullctx_count, 1); 888 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 889 /* 890 * Schedule an ast to sample the counters, which will 891 * propagate any overflow into the virtualized performance 892 * counter(s), and may deliver a signal. 893 */ 894 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 895 /* 896 * If a counter has overflowed which was counting on behalf of 897 * a request which specified CPC_OVF_NOTIFY_EMT, send the 898 * process a signal. 899 */ 900 for (i = 0; i < cpc_ncounters; i++) { 901 if (ctx->kc_pics[i].kp_req != NULL && 902 bitmap & (1 << i) && 903 ctx->kc_pics[i].kp_req->kr_flags & 904 CPC_OVF_NOTIFY_EMT) { 905 /* 906 * A signal has been requested for this PIC, so 907 * so freeze the context. The interrupt handler 908 * has already stopped the counter hardware. 909 */ 910 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 911 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 912 KCPC_PIC_OVERFLOWED); 913 } 914 } 915 aston(t); 916 } 917 return (NULL); 918 } 919 920 /* 921 * The current thread context had an overflow interrupt; we're 922 * executing here in high-level interrupt context. 923 */ 924 /*ARGSUSED*/ 925 uint_t 926 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 927 { 928 kcpc_ctx_t *ctx; 929 uint64_t bitmap; 930 931 if (pcbe_ops == NULL || 932 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 933 return (DDI_INTR_UNCLAIMED); 934 935 /* 936 * Prevent any further interrupts. 937 */ 938 pcbe_ops->pcbe_allstop(); 939 940 /* 941 * Invoke the "generic" handler. 942 * 943 * If the interrupt has occurred in the context of an lwp owning 944 * the counters, then the handler posts an AST to the lwp to 945 * trigger the actual sampling, and optionally deliver a signal or 946 * restart the counters, on the way out of the kernel using 947 * kcpc_hw_overflow_ast() (see below). 948 * 949 * On the other hand, if the handler returns the context to us 950 * directly, then it means that there are no other threads in 951 * the middle of updating it, no AST has been posted, and so we 952 * should sample the counters here, and restart them with no 953 * further fuss. 954 */ 955 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 956 uint64_t curtick = KCPC_GET_TICK(); 957 958 ctx->kc_hrtime = gethrtime_waitfree(); 959 ctx->kc_vtick += curtick - ctx->kc_rawtick; 960 ctx->kc_rawtick = curtick; 961 pcbe_ops->pcbe_sample(ctx); 962 pcbe_ops->pcbe_program(ctx); 963 } 964 965 return (DDI_INTR_CLAIMED); 966 } 967 968 /* 969 * Called from trap() when processing the ast posted by the high-level 970 * interrupt handler. 971 */ 972 int 973 kcpc_overflow_ast() 974 { 975 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 976 int i; 977 int found = 0; 978 uint64_t curtick = KCPC_GET_TICK(); 979 980 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 981 982 /* 983 * An overflow happened: sample the context to ensure that 984 * the overflow is propagated into the upper bits of the 985 * virtualized 64-bit counter(s). 986 */ 987 kpreempt_disable(); 988 ctx->kc_hrtime = gethrtime_waitfree(); 989 pcbe_ops->pcbe_sample(ctx); 990 kpreempt_enable(); 991 992 ctx->kc_vtick += curtick - ctx->kc_rawtick; 993 994 /* 995 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 996 * if that pic generated an overflow and if the request it was counting 997 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 998 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 999 * found any overflowed pics, keep the context frozen and return true 1000 * (thus causing a signal to be sent). 1001 */ 1002 for (i = 0; i < cpc_ncounters; i++) { 1003 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1004 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1005 ~KCPC_PIC_OVERFLOWED); 1006 found = 1; 1007 } 1008 } 1009 if (found) 1010 return (1); 1011 1012 /* 1013 * Otherwise, re-enable the counters and continue life as before. 1014 */ 1015 kpreempt_disable(); 1016 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 1017 pcbe_ops->pcbe_program(ctx); 1018 kpreempt_enable(); 1019 return (0); 1020 } 1021 1022 /* 1023 * Called when switching away from current thread. 1024 */ 1025 static void 1026 kcpc_save(kcpc_ctx_t *ctx) 1027 { 1028 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1029 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 1030 return; 1031 /* 1032 * This context has been invalidated but the counters have not 1033 * been stopped. Stop them here and mark the context stopped. 1034 */ 1035 pcbe_ops->pcbe_allstop(); 1036 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1037 return; 1038 } 1039 1040 pcbe_ops->pcbe_allstop(); 1041 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1042 return; 1043 1044 /* 1045 * Need to sample for all reqs into each req's current mpic. 1046 */ 1047 ctx->kc_hrtime = gethrtime(); 1048 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1049 pcbe_ops->pcbe_sample(ctx); 1050 } 1051 1052 static void 1053 kcpc_restore(kcpc_ctx_t *ctx) 1054 { 1055 mutex_enter(&ctx->kc_lock); 1056 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1057 KCPC_CTX_INVALID) 1058 /* 1059 * The context is invalidated but has not been marked stopped. 1060 * We mark it as such here because we will not start the 1061 * counters during this context switch. 1062 */ 1063 ctx->kc_flags |= KCPC_CTX_INVALID_STOPPED; 1064 1065 1066 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1067 mutex_exit(&ctx->kc_lock); 1068 return; 1069 } 1070 1071 /* 1072 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1073 * ctx & set related memory objects being freed without us knowing. 1074 * This can happen if an agent thread is executing a kcpc_unbind(), 1075 * with this thread as the target, whilst we're concurrently doing a 1076 * restorectx() during, for example, a proc_exit(). Effectively, by 1077 * doing this, we're asking kcpc_free() to cv_wait() until 1078 * kcpc_restore() has completed. 1079 */ 1080 ctx->kc_flags |= KCPC_CTX_RESTORE; 1081 mutex_exit(&ctx->kc_lock); 1082 1083 /* 1084 * While programming the hardware, the counters should be stopped. We 1085 * don't do an explicit pcbe_allstop() here because they should have 1086 * been stopped already by the last consumer. 1087 */ 1088 ctx->kc_rawtick = KCPC_GET_TICK(); 1089 pcbe_ops->pcbe_program(ctx); 1090 1091 /* 1092 * Wake the agent thread if it's waiting in kcpc_free(). 1093 */ 1094 mutex_enter(&ctx->kc_lock); 1095 ctx->kc_flags &= ~KCPC_CTX_RESTORE; 1096 cv_signal(&ctx->kc_condv); 1097 mutex_exit(&ctx->kc_lock); 1098 } 1099 1100 /* 1101 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1102 * following context operators to the idle thread on each CPU. They stop the 1103 * counters when the idle thread is switched on, and they start them again when 1104 * it is switched off. 1105 */ 1106 1107 /*ARGSUSED*/ 1108 void 1109 kcpc_idle_save(struct cpu *cp) 1110 { 1111 /* 1112 * The idle thread shouldn't be run anywhere else. 1113 */ 1114 ASSERT(CPU == cp); 1115 1116 /* 1117 * We must hold the CPU's context lock to ensure the context isn't freed 1118 * while we're looking at it. 1119 */ 1120 mutex_enter(&cp->cpu_cpc_ctxlock); 1121 1122 if ((cp->cpu_cpc_ctx == NULL) || 1123 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1124 mutex_exit(&cp->cpu_cpc_ctxlock); 1125 return; 1126 } 1127 1128 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1129 mutex_exit(&cp->cpu_cpc_ctxlock); 1130 } 1131 1132 void 1133 kcpc_idle_restore(struct cpu *cp) 1134 { 1135 /* 1136 * The idle thread shouldn't be run anywhere else. 1137 */ 1138 ASSERT(CPU == cp); 1139 1140 /* 1141 * We must hold the CPU's context lock to ensure the context isn't freed 1142 * while we're looking at it. 1143 */ 1144 mutex_enter(&cp->cpu_cpc_ctxlock); 1145 1146 if ((cp->cpu_cpc_ctx == NULL) || 1147 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1148 mutex_exit(&cp->cpu_cpc_ctxlock); 1149 return; 1150 } 1151 1152 pcbe_ops->pcbe_allstop(); 1153 mutex_exit(&cp->cpu_cpc_ctxlock); 1154 } 1155 1156 /*ARGSUSED*/ 1157 static void 1158 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1159 { 1160 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1161 int i; 1162 1163 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1164 return; 1165 1166 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1167 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1168 rw_exit(&kcpc_cpuctx_lock); 1169 return; 1170 } 1171 cctx = kcpc_ctx_alloc(); 1172 kcpc_ctx_clone(ctx, cctx); 1173 rw_exit(&kcpc_cpuctx_lock); 1174 1175 /* 1176 * Copy the parent context's kc_flags field, but don't overwrite 1177 * the child's in case it was modified during kcpc_ctx_clone. 1178 */ 1179 cctx->kc_flags |= ctx->kc_flags; 1180 cctx->kc_thread = ct; 1181 cctx->kc_cpuid = -1; 1182 ct->t_cpc_set = cctx->kc_set; 1183 ct->t_cpc_ctx = cctx; 1184 1185 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1186 kcpc_set_t *ks = cctx->kc_set; 1187 /* 1188 * Our contract with the user requires us to immediately send an 1189 * overflow signal to all children if we have the LWPINHERIT 1190 * and SIGOVF flags set. In addition, all counters should be 1191 * set to UINT64_MAX, and their pic's overflow flag turned on 1192 * so that our trap() processing knows to send a signal. 1193 */ 1194 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1195 for (i = 0; i < ks->ks_nreqs; i++) { 1196 kcpc_request_t *kr = &ks->ks_req[i]; 1197 1198 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1199 *(kr->kr_data) = UINT64_MAX; 1200 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1201 } 1202 } 1203 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1204 aston(ct); 1205 } 1206 1207 installctx(ct, cctx, kcpc_save, kcpc_restore, 1208 NULL, kcpc_lwp_create, NULL, kcpc_free); 1209 } 1210 1211 /* 1212 * Counter Stoppage Theory 1213 * 1214 * The counters may need to be stopped properly at the following occasions: 1215 * 1216 * 1) An LWP exits. 1217 * 2) A thread exits. 1218 * 3) An LWP performs an exec(). 1219 * 4) A bound set is unbound. 1220 * 1221 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1222 * to be freed as well. 1223 * 1224 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1225 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1226 * context. 1227 * 1228 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1229 * 1230 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1231 * been called from exec. It stops the counters _and_ frees the context. 1232 * 1233 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1234 * 1235 * CPU-bound counters are always stopped via kcpc_unbind(). 1236 */ 1237 1238 /* 1239 * We're being called to delete the context; we ensure that all associated data 1240 * structures are freed, and that the hardware is passivated if this is an exec. 1241 */ 1242 1243 /*ARGSUSED*/ 1244 static void 1245 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1246 { 1247 int i; 1248 kcpc_set_t *set = ctx->kc_set; 1249 1250 ASSERT(set != NULL); 1251 1252 /* 1253 * Wait for kcpc_restore() to finish before we tear things down. 1254 */ 1255 mutex_enter(&ctx->kc_lock); 1256 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1257 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1258 ctx->kc_flags |= KCPC_CTX_INVALID; 1259 mutex_exit(&ctx->kc_lock); 1260 1261 if (isexec) { 1262 /* 1263 * This thread is execing, and after the exec it should not have 1264 * any performance counter context. Stop the counters properly 1265 * here so the system isn't surprised by an overflow interrupt 1266 * later. 1267 */ 1268 if (ctx->kc_cpuid != -1) { 1269 cpu_t *cp; 1270 /* 1271 * CPU-bound context; stop the appropriate CPU's ctrs. 1272 * Hold cpu_lock while examining the CPU to ensure it 1273 * doesn't go away. 1274 */ 1275 mutex_enter(&cpu_lock); 1276 cp = cpu_get(ctx->kc_cpuid); 1277 /* 1278 * The CPU could have been DR'd out, so only stop the 1279 * CPU and clear its context pointer if the CPU still 1280 * exists. 1281 */ 1282 if (cp != NULL) { 1283 mutex_enter(&cp->cpu_cpc_ctxlock); 1284 kcpc_stop_hw(ctx); 1285 cp->cpu_cpc_ctx = NULL; 1286 mutex_exit(&cp->cpu_cpc_ctxlock); 1287 } 1288 mutex_exit(&cpu_lock); 1289 ASSERT(curthread->t_cpc_ctx == NULL); 1290 } else { 1291 /* 1292 * Thread-bound context; stop _this_ CPU's counters. 1293 */ 1294 kpreempt_disable(); 1295 pcbe_ops->pcbe_allstop(); 1296 atomic_or_uint(&ctx->kc_flags, 1297 KCPC_CTX_INVALID_STOPPED); 1298 kpreempt_enable(); 1299 curthread->t_cpc_ctx = NULL; 1300 } 1301 1302 /* 1303 * Since we are being called from an exec and we know that 1304 * exec is not permitted via the agent thread, we should clean 1305 * up this thread's CPC state completely, and not leave dangling 1306 * CPC pointers behind. 1307 */ 1308 ASSERT(ctx->kc_thread == curthread); 1309 curthread->t_cpc_set = NULL; 1310 } 1311 1312 /* 1313 * Walk through each request in this context's set and free the PCBE's 1314 * configuration if it exists. 1315 */ 1316 for (i = 0; i < set->ks_nreqs; i++) { 1317 if (set->ks_req[i].kr_config != NULL) 1318 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1319 } 1320 1321 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1322 kcpc_ctx_free(ctx); 1323 kcpc_free_set(set); 1324 } 1325 1326 /* 1327 * Free the memory associated with a request set. 1328 */ 1329 void 1330 kcpc_free_set(kcpc_set_t *set) 1331 { 1332 int i; 1333 kcpc_request_t *req; 1334 1335 ASSERT(set->ks_req != NULL); 1336 1337 for (i = 0; i < set->ks_nreqs; i++) { 1338 req = &set->ks_req[i]; 1339 1340 if (req->kr_nattrs != 0) { 1341 kmem_free(req->kr_attr, 1342 req->kr_nattrs * sizeof (kcpc_attr_t)); 1343 } 1344 } 1345 1346 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1347 cv_destroy(&set->ks_condv); 1348 mutex_destroy(&set->ks_lock); 1349 kmem_free(set, sizeof (kcpc_set_t)); 1350 } 1351 1352 /* 1353 * Grab every existing context and mark it as invalid. 1354 */ 1355 void 1356 kcpc_invalidate_all(void) 1357 { 1358 kcpc_ctx_t *ctx; 1359 long hash; 1360 1361 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1362 mutex_enter(&kcpc_ctx_llock[hash]); 1363 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1364 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1365 mutex_exit(&kcpc_ctx_llock[hash]); 1366 } 1367 } 1368 1369 /* 1370 * Interface for PCBEs to signal that an existing configuration has suddenly 1371 * become invalid. 1372 */ 1373 void 1374 kcpc_invalidate_config(void *token) 1375 { 1376 kcpc_ctx_t *ctx = token; 1377 1378 ASSERT(ctx != NULL); 1379 1380 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1381 } 1382 1383 /* 1384 * Called from lwp_exit() and thread_exit() 1385 */ 1386 void 1387 kcpc_passivate(void) 1388 { 1389 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1390 kcpc_set_t *set = curthread->t_cpc_set; 1391 1392 if (set == NULL) 1393 return; 1394 1395 /* 1396 * We're cleaning up after this thread; ensure there are no dangling 1397 * CPC pointers left behind. The context and set will be freed by 1398 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1399 * the case of a CPU-bound set. 1400 */ 1401 curthread->t_cpc_ctx = NULL; 1402 1403 if (ctx == NULL) { 1404 /* 1405 * This thread has a set but no context; it must be a CPU-bound 1406 * set. The hardware will be stopped via kcpc_unbind() when the 1407 * process exits and closes its file descriptors with 1408 * kcpc_close(). Our only job here is to clean up this thread's 1409 * state; the set will be freed with the unbind(). 1410 */ 1411 (void) kcpc_unbind(set); 1412 /* 1413 * Unbinding a set belonging to the current thread should clear 1414 * its set pointer. 1415 */ 1416 ASSERT(curthread->t_cpc_set == NULL); 1417 return; 1418 } 1419 1420 curthread->t_cpc_set = NULL; 1421 1422 /* 1423 * This thread/LWP is exiting but context switches will continue to 1424 * happen for a bit as the exit proceeds. Kernel preemption must be 1425 * disabled here to prevent a race between checking or setting the 1426 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1427 * a context switch. 1428 */ 1429 1430 kpreempt_disable(); 1431 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1432 pcbe_ops->pcbe_allstop(); 1433 atomic_or_uint(&ctx->kc_flags, 1434 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1435 } 1436 kpreempt_enable(); 1437 } 1438 1439 /* 1440 * Assign the requests in the given set to the PICs in the context. 1441 * Returns 0 if successful, -1 on failure. 1442 */ 1443 /*ARGSUSED*/ 1444 static int 1445 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1446 { 1447 int i; 1448 int *picnum_save; 1449 1450 ASSERT(set->ks_nreqs <= cpc_ncounters); 1451 1452 /* 1453 * Provide kcpc_tryassign() with scratch space to avoid doing an 1454 * alloc/free with every invocation. 1455 */ 1456 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1457 /* 1458 * kcpc_tryassign() blindly walks through each request in the set, 1459 * seeing if a counter can count its event. If yes, it assigns that 1460 * counter. However, that counter may have been the only capable counter 1461 * for _another_ request's event. The solution is to try every possible 1462 * request first. Note that this does not cover all solutions, as 1463 * that would require all unique orderings of requests, an n^n operation 1464 * which would be unacceptable for architectures with many counters. 1465 */ 1466 for (i = 0; i < set->ks_nreqs; i++) 1467 if (kcpc_tryassign(set, i, picnum_save) == 0) 1468 break; 1469 1470 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1471 if (i == set->ks_nreqs) 1472 return (-1); 1473 return (0); 1474 } 1475 1476 static int 1477 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1478 { 1479 int i; 1480 int j; 1481 uint64_t bitmap = 0, resmap = 0; 1482 uint64_t ctrmap; 1483 1484 /* 1485 * We are attempting to assign the reqs to pics, but we may fail. If we 1486 * fail, we need to restore the state of the requests to what it was 1487 * when we found it, as some reqs may have been explicitly assigned to 1488 * a specific PIC beforehand. We do this by snapshotting the assignments 1489 * now and restoring from it later if we fail. 1490 * 1491 * Also we note here which counters have already been claimed by 1492 * requests with explicit counter assignments. 1493 */ 1494 for (i = 0; i < set->ks_nreqs; i++) { 1495 scratch[i] = set->ks_req[i].kr_picnum; 1496 if (set->ks_req[i].kr_picnum != -1) 1497 resmap |= (1 << set->ks_req[i].kr_picnum); 1498 } 1499 1500 /* 1501 * Walk through requests assigning them to the first PIC that is 1502 * capable. 1503 */ 1504 i = starting_req; 1505 do { 1506 if (set->ks_req[i].kr_picnum != -1) { 1507 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1508 bitmap |= (1 << set->ks_req[i].kr_picnum); 1509 if (++i == set->ks_nreqs) 1510 i = 0; 1511 continue; 1512 } 1513 1514 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1515 for (j = 0; j < cpc_ncounters; j++) { 1516 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1517 (resmap & (1 << j)) == 0) { 1518 /* 1519 * We can assign this counter because: 1520 * 1521 * 1. It can count the event (ctrmap) 1522 * 2. It hasn't been assigned yet (bitmap) 1523 * 3. It wasn't reserved by a request (resmap) 1524 */ 1525 bitmap |= (1 << j); 1526 break; 1527 } 1528 } 1529 if (j == cpc_ncounters) { 1530 for (i = 0; i < set->ks_nreqs; i++) 1531 set->ks_req[i].kr_picnum = scratch[i]; 1532 return (-1); 1533 } 1534 set->ks_req[i].kr_picnum = j; 1535 1536 if (++i == set->ks_nreqs) 1537 i = 0; 1538 } while (i != starting_req); 1539 1540 return (0); 1541 } 1542 1543 kcpc_set_t * 1544 kcpc_dup_set(kcpc_set_t *set) 1545 { 1546 kcpc_set_t *new; 1547 int i; 1548 int j; 1549 1550 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1551 new->ks_state &= ~KCPC_SET_BOUND; 1552 new->ks_flags = set->ks_flags; 1553 new->ks_nreqs = set->ks_nreqs; 1554 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1555 KM_SLEEP); 1556 new->ks_data = NULL; 1557 new->ks_ctx = NULL; 1558 1559 for (i = 0; i < new->ks_nreqs; i++) { 1560 new->ks_req[i].kr_config = NULL; 1561 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1562 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1563 new->ks_req[i].kr_picp = NULL; 1564 new->ks_req[i].kr_data = NULL; 1565 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1566 CPC_MAX_EVENT_LEN); 1567 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1568 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1569 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1570 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1571 sizeof (kcpc_attr_t), KM_SLEEP); 1572 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1573 new->ks_req[i].kr_attr[j].ka_val = 1574 set->ks_req[i].kr_attr[j].ka_val; 1575 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1576 set->ks_req[i].kr_attr[j].ka_name, 1577 CPC_MAX_ATTR_LEN); 1578 } 1579 } 1580 1581 return (new); 1582 } 1583 1584 int 1585 kcpc_allow_nonpriv(void *token) 1586 { 1587 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1588 } 1589 1590 void 1591 kcpc_invalidate(kthread_t *t) 1592 { 1593 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1594 1595 if (ctx != NULL) 1596 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1597 } 1598 1599 /* 1600 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1601 * are used to construct PCBE names, starting with the most specific, 1602 * "pcbe.first.second.third.fourth" and ending with the least specific, 1603 * "pcbe.first". 1604 * 1605 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1606 */ 1607 int 1608 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1609 { 1610 uint_t s[3]; 1611 1612 s[0] = first; 1613 s[1] = second; 1614 s[2] = third; 1615 1616 return (modload_qualified("pcbe", 1617 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1618 } 1619