1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/inttypes.h> 33 #include <sys/cmn_err.h> 34 #include <sys/time.h> 35 #include <sys/mutex.h> 36 #include <sys/systm.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_impl.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/atomic.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #if defined(__x86) 45 #include <asm/clock.h> 46 #endif 47 48 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 49 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 50 51 52 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 53 int kcpc_cpuctx; /* number of cpu-specific contexts */ 54 55 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 56 57 /* 58 * These are set when a PCBE module is loaded. 59 */ 60 uint_t cpc_ncounters = 0; 61 pcbe_ops_t *pcbe_ops = NULL; 62 63 /* 64 * Statistics on (mis)behavior 65 */ 66 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 67 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 68 69 /* 70 * Is misbehaviour (overflow in a thread with no context) fatal? 71 */ 72 #ifdef DEBUG 73 static int kcpc_nullctx_panic = 1; 74 #else 75 static int kcpc_nullctx_panic = 0; 76 #endif 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 82 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 83 static void kcpc_free_configs(kcpc_set_t *set); 84 static kcpc_ctx_t *kcpc_ctx_alloc(void); 85 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 86 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 87 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 88 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 89 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 90 91 void 92 kcpc_register_pcbe(pcbe_ops_t *ops) 93 { 94 pcbe_ops = ops; 95 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 96 } 97 98 int 99 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 100 { 101 cpu_t *cp; 102 kcpc_ctx_t *ctx; 103 int error; 104 105 ctx = kcpc_ctx_alloc(); 106 107 if (kcpc_assign_reqs(set, ctx) != 0) { 108 kcpc_ctx_free(ctx); 109 *subcode = CPC_RESOURCE_UNAVAIL; 110 return (EINVAL); 111 } 112 113 ctx->kc_cpuid = cpuid; 114 ctx->kc_thread = curthread; 115 116 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 117 118 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 119 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 120 kcpc_ctx_free(ctx); 121 return (error); 122 } 123 124 set->ks_ctx = ctx; 125 ctx->kc_set = set; 126 127 /* 128 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 129 * we are manipulating the cpu_t and programming the hardware, else the 130 * the cpu_t could go away while we're looking at it. 131 */ 132 mutex_enter(&cpu_lock); 133 cp = cpu_get(cpuid); 134 135 if (cp == NULL) 136 /* 137 * The CPU could have been DRd out while we were getting set up. 138 */ 139 goto unbound; 140 141 mutex_enter(&cp->cpu_cpc_ctxlock); 142 143 if (cp->cpu_cpc_ctx != NULL) { 144 /* 145 * If this CPU already has a bound set, return an error. 146 */ 147 mutex_exit(&cp->cpu_cpc_ctxlock); 148 goto unbound; 149 } 150 151 if (curthread->t_bind_cpu != cpuid) { 152 mutex_exit(&cp->cpu_cpc_ctxlock); 153 goto unbound; 154 } 155 cp->cpu_cpc_ctx = ctx; 156 157 /* 158 * Kernel preemption must be disabled while fiddling with the hardware 159 * registers to prevent partial updates. 160 */ 161 kpreempt_disable(); 162 ctx->kc_rawtick = KCPC_GET_TICK(); 163 pcbe_ops->pcbe_program(ctx); 164 kpreempt_enable(); 165 166 mutex_exit(&cp->cpu_cpc_ctxlock); 167 mutex_exit(&cpu_lock); 168 169 return (0); 170 171 unbound: 172 mutex_exit(&cpu_lock); 173 set->ks_ctx = NULL; 174 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 175 kcpc_ctx_free(ctx); 176 return (EAGAIN); 177 } 178 179 int 180 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 181 { 182 kcpc_ctx_t *ctx; 183 int error; 184 185 /* 186 * Only one set is allowed per context, so ensure there is no 187 * existing context. 188 */ 189 190 if (t->t_cpc_ctx != NULL) 191 return (EEXIST); 192 193 ctx = kcpc_ctx_alloc(); 194 195 /* 196 * The context must begin life frozen until it has been properly 197 * programmed onto the hardware. This prevents the context ops from 198 * worrying about it until we're ready. 199 */ 200 ctx->kc_flags |= KCPC_CTX_FREEZE; 201 ctx->kc_hrtime = gethrtime(); 202 203 if (kcpc_assign_reqs(set, ctx) != 0) { 204 kcpc_ctx_free(ctx); 205 *subcode = CPC_RESOURCE_UNAVAIL; 206 return (EINVAL); 207 } 208 209 ctx->kc_cpuid = -1; 210 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 211 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 212 ctx->kc_thread = t; 213 t->t_cpc_ctx = ctx; 214 /* 215 * Permit threads to look at their own hardware counters from userland. 216 */ 217 ctx->kc_flags |= KCPC_CTX_NONPRIV; 218 219 /* 220 * Create the data store for this set. 221 */ 222 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 223 224 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 225 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 226 kcpc_ctx_free(ctx); 227 t->t_cpc_ctx = NULL; 228 return (error); 229 } 230 231 set->ks_ctx = ctx; 232 ctx->kc_set = set; 233 234 /* 235 * Add a device context to the subject thread. 236 */ 237 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 238 kcpc_lwp_create, NULL, kcpc_free); 239 240 /* 241 * Ask the backend to program the hardware. 242 */ 243 if (t == curthread) { 244 kpreempt_disable(); 245 ctx->kc_rawtick = KCPC_GET_TICK(); 246 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 247 pcbe_ops->pcbe_program(ctx); 248 kpreempt_enable(); 249 } else 250 /* 251 * Since we are the agent LWP, we know the victim LWP is stopped 252 * until we're done here; no need to worry about preemption or 253 * migration here. We still use an atomic op to clear the flag 254 * to ensure the flags are always self-consistent; they can 255 * still be accessed from, for instance, another CPU doing a 256 * kcpc_invalidate_all(). 257 */ 258 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 259 260 261 return (0); 262 } 263 264 /* 265 * Walk through each request in the set and ask the PCBE to configure a 266 * corresponding counter. 267 */ 268 static int 269 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 270 { 271 int i; 272 int ret; 273 kcpc_request_t *rp; 274 275 for (i = 0; i < set->ks_nreqs; i++) { 276 int n; 277 rp = &set->ks_req[i]; 278 279 n = rp->kr_picnum; 280 281 ASSERT(n >= 0 && n < cpc_ncounters); 282 283 ASSERT(ctx->kc_pics[n].kp_req == NULL); 284 285 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 286 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 287 == 0) { 288 *subcode = -1; 289 return (ENOTSUP); 290 } 291 /* 292 * If any of the counters have requested overflow 293 * notification, we flag the context as being one that 294 * cares about overflow. 295 */ 296 ctx->kc_flags |= KCPC_CTX_SIGOVF; 297 } 298 299 rp->kr_config = NULL; 300 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 301 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 302 &(rp->kr_config), (void *)ctx)) != 0) { 303 kcpc_free_configs(set); 304 *subcode = ret; 305 switch (ret) { 306 case CPC_ATTR_REQUIRES_PRIVILEGE: 307 case CPC_HV_NO_ACCESS: 308 return (EACCES); 309 default: 310 return (EINVAL); 311 } 312 } 313 314 ctx->kc_pics[n].kp_req = rp; 315 rp->kr_picp = &ctx->kc_pics[n]; 316 rp->kr_data = set->ks_data + rp->kr_index; 317 *rp->kr_data = rp->kr_preset; 318 } 319 320 return (0); 321 } 322 323 static void 324 kcpc_free_configs(kcpc_set_t *set) 325 { 326 int i; 327 328 for (i = 0; i < set->ks_nreqs; i++) 329 if (set->ks_req[i].kr_config != NULL) 330 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 331 } 332 333 /* 334 * buf points to a user address and the data should be copied out to that 335 * address in the current process. 336 */ 337 int 338 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 339 { 340 kcpc_ctx_t *ctx = set->ks_ctx; 341 uint64_t curtick = KCPC_GET_TICK(); 342 343 if (ctx == NULL) 344 return (EINVAL); 345 else if (ctx->kc_flags & KCPC_CTX_INVALID) 346 return (EAGAIN); 347 348 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 349 /* 350 * Kernel preemption must be disabled while reading the 351 * hardware regs, and if this is a CPU-bound context, while 352 * checking the CPU binding of the current thread. 353 */ 354 kpreempt_disable(); 355 356 if (ctx->kc_cpuid != -1) { 357 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 358 kpreempt_enable(); 359 return (EAGAIN); 360 } 361 } 362 363 if (ctx->kc_thread == curthread) { 364 ctx->kc_hrtime = gethrtime(); 365 pcbe_ops->pcbe_sample(ctx); 366 ctx->kc_vtick += curtick - ctx->kc_rawtick; 367 ctx->kc_rawtick = curtick; 368 } 369 370 kpreempt_enable(); 371 372 /* 373 * The config may have been invalidated by 374 * the pcbe_sample op. 375 */ 376 if (ctx->kc_flags & KCPC_CTX_INVALID) 377 return (EAGAIN); 378 } 379 380 if (copyout(set->ks_data, buf, 381 set->ks_nreqs * sizeof (uint64_t)) == -1) 382 return (EFAULT); 383 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 384 return (EFAULT); 385 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 386 return (EFAULT); 387 388 return (0); 389 } 390 391 /* 392 * Stop the counters on the CPU this context is bound to. 393 */ 394 static void 395 kcpc_stop_hw(kcpc_ctx_t *ctx) 396 { 397 cpu_t *cp; 398 399 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 400 == KCPC_CTX_INVALID); 401 402 kpreempt_disable(); 403 404 cp = cpu_get(ctx->kc_cpuid); 405 ASSERT(cp != NULL); 406 407 if (cp == CPU) { 408 pcbe_ops->pcbe_allstop(); 409 atomic_or_uint(&ctx->kc_flags, 410 KCPC_CTX_INVALID_STOPPED); 411 } else 412 kcpc_remote_stop(cp); 413 kpreempt_enable(); 414 } 415 416 int 417 kcpc_unbind(kcpc_set_t *set) 418 { 419 kcpc_ctx_t *ctx = set->ks_ctx; 420 kthread_t *t; 421 422 if (ctx == NULL) 423 return (EINVAL); 424 425 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 426 427 if (ctx->kc_cpuid == -1) { 428 t = ctx->kc_thread; 429 /* 430 * The context is thread-bound and therefore has a device 431 * context. It will be freed via removectx() calling 432 * freectx() calling kcpc_free(). 433 */ 434 if (t == curthread && 435 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 436 kpreempt_disable(); 437 pcbe_ops->pcbe_allstop(); 438 atomic_or_uint(&ctx->kc_flags, 439 KCPC_CTX_INVALID_STOPPED); 440 kpreempt_enable(); 441 } 442 #ifdef DEBUG 443 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 444 kcpc_lwp_create, NULL, kcpc_free) == 0) 445 panic("kcpc_unbind: context %p not preset on thread %p", 446 ctx, t); 447 #else 448 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 449 kcpc_lwp_create, NULL, kcpc_free); 450 #endif /* DEBUG */ 451 t->t_cpc_set = NULL; 452 t->t_cpc_ctx = NULL; 453 } else { 454 /* 455 * If we are unbinding a CPU-bound set from a remote CPU, the 456 * native CPU's idle thread could be in the midst of programming 457 * this context onto the CPU. We grab the context's lock here to 458 * ensure that the idle thread is done with it. When we release 459 * the lock, the CPU no longer has a context and the idle thread 460 * will move on. 461 * 462 * cpu_lock must be held to prevent the CPU from being DR'd out 463 * while we disassociate the context from the cpu_t. 464 */ 465 cpu_t *cp; 466 mutex_enter(&cpu_lock); 467 cp = cpu_get(ctx->kc_cpuid); 468 if (cp != NULL) { 469 /* 470 * The CPU may have been DR'd out of the system. 471 */ 472 mutex_enter(&cp->cpu_cpc_ctxlock); 473 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 474 kcpc_stop_hw(ctx); 475 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 476 cp->cpu_cpc_ctx = NULL; 477 mutex_exit(&cp->cpu_cpc_ctxlock); 478 } 479 mutex_exit(&cpu_lock); 480 if (ctx->kc_thread == curthread) { 481 kcpc_free(ctx, 0); 482 curthread->t_cpc_set = NULL; 483 } 484 } 485 486 return (0); 487 } 488 489 int 490 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 491 { 492 int i; 493 494 ASSERT(set != NULL); 495 ASSERT(set->ks_ctx != NULL); 496 ASSERT(set->ks_ctx->kc_thread == curthread); 497 ASSERT(set->ks_ctx->kc_cpuid == -1); 498 499 if (index < 0 || index >= set->ks_nreqs) 500 return (EINVAL); 501 502 for (i = 0; i < set->ks_nreqs; i++) 503 if (set->ks_req[i].kr_index == index) 504 break; 505 ASSERT(i != set->ks_nreqs); 506 507 set->ks_req[i].kr_preset = preset; 508 return (0); 509 } 510 511 int 512 kcpc_restart(kcpc_set_t *set) 513 { 514 kcpc_ctx_t *ctx = set->ks_ctx; 515 int i; 516 517 ASSERT(ctx != NULL); 518 ASSERT(ctx->kc_thread == curthread); 519 ASSERT(ctx->kc_cpuid == -1); 520 521 kpreempt_disable(); 522 523 /* 524 * If the user is doing this on a running set, make sure the counters 525 * are stopped first. 526 */ 527 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 528 pcbe_ops->pcbe_allstop(); 529 530 for (i = 0; i < set->ks_nreqs; i++) { 531 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 532 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 533 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 534 } 535 536 /* 537 * Ask the backend to program the hardware. 538 */ 539 ctx->kc_rawtick = KCPC_GET_TICK(); 540 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 541 pcbe_ops->pcbe_program(ctx); 542 kpreempt_enable(); 543 544 return (0); 545 } 546 547 /* 548 * Caller must hold kcpc_cpuctx_lock. 549 */ 550 int 551 kcpc_enable(kthread_t *t, int cmd, int enable) 552 { 553 kcpc_ctx_t *ctx = t->t_cpc_ctx; 554 kcpc_set_t *set = t->t_cpc_set; 555 kcpc_set_t *newset; 556 int i; 557 int flag; 558 int err; 559 560 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 561 562 if (ctx == NULL) { 563 /* 564 * This thread has a set but no context; it must be a 565 * CPU-bound set. 566 */ 567 ASSERT(t->t_cpc_set != NULL); 568 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 569 return (EINVAL); 570 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 571 return (EAGAIN); 572 573 if (cmd == CPC_ENABLE) { 574 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 575 return (EINVAL); 576 kpreempt_disable(); 577 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 578 kcpc_restore(ctx); 579 kpreempt_enable(); 580 } else if (cmd == CPC_DISABLE) { 581 if (ctx->kc_flags & KCPC_CTX_FREEZE) 582 return (EINVAL); 583 kpreempt_disable(); 584 kcpc_save(ctx); 585 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 586 kpreempt_enable(); 587 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 588 /* 589 * Strategy for usr/sys: stop counters and update set's presets 590 * with current counter values, unbind, update requests with 591 * new config, then re-bind. 592 */ 593 flag = (cmd == CPC_USR_EVENTS) ? 594 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 595 596 kpreempt_disable(); 597 atomic_or_uint(&ctx->kc_flags, 598 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 599 pcbe_ops->pcbe_allstop(); 600 kpreempt_enable(); 601 for (i = 0; i < set->ks_nreqs; i++) { 602 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 603 if (enable) 604 set->ks_req[i].kr_flags |= flag; 605 else 606 set->ks_req[i].kr_flags &= ~flag; 607 } 608 newset = kcpc_dup_set(set); 609 if (kcpc_unbind(set) != 0) 610 return (EINVAL); 611 t->t_cpc_set = newset; 612 if (kcpc_bind_thread(newset, t, &err) != 0) { 613 t->t_cpc_set = NULL; 614 kcpc_free_set(newset); 615 return (EINVAL); 616 } 617 } else 618 return (EINVAL); 619 620 return (0); 621 } 622 623 /* 624 * Provide PCBEs with a way of obtaining the configs of every counter which will 625 * be programmed together. 626 * 627 * If current is NULL, provide the first config. 628 * 629 * If data != NULL, caller wants to know where the data store associated with 630 * the config we return is located. 631 */ 632 void * 633 kcpc_next_config(void *token, void *current, uint64_t **data) 634 { 635 int i; 636 kcpc_pic_t *pic; 637 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 638 639 if (current == NULL) { 640 /* 641 * Client would like the first config, which may not be in 642 * counter 0; we need to search through the counters for the 643 * first config. 644 */ 645 for (i = 0; i < cpc_ncounters; i++) 646 if (ctx->kc_pics[i].kp_req != NULL) 647 break; 648 /* 649 * There are no counters configured for the given context. 650 */ 651 if (i == cpc_ncounters) 652 return (NULL); 653 } else { 654 /* 655 * There surely is a faster way to do this. 656 */ 657 for (i = 0; i < cpc_ncounters; i++) { 658 pic = &ctx->kc_pics[i]; 659 660 if (pic->kp_req != NULL && 661 current == pic->kp_req->kr_config) 662 break; 663 } 664 665 /* 666 * We found the current config at picnum i. Now search for the 667 * next configured PIC. 668 */ 669 for (i++; i < cpc_ncounters; i++) { 670 pic = &ctx->kc_pics[i]; 671 if (pic->kp_req != NULL) 672 break; 673 } 674 675 if (i == cpc_ncounters) 676 return (NULL); 677 } 678 679 if (data != NULL) { 680 *data = ctx->kc_pics[i].kp_req->kr_data; 681 } 682 683 return (ctx->kc_pics[i].kp_req->kr_config); 684 } 685 686 687 static kcpc_ctx_t * 688 kcpc_ctx_alloc(void) 689 { 690 kcpc_ctx_t *ctx; 691 long hash; 692 693 ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP); 694 695 hash = CPC_HASH_CTX(ctx); 696 mutex_enter(&kcpc_ctx_llock[hash]); 697 ctx->kc_next = kcpc_ctx_list[hash]; 698 kcpc_ctx_list[hash] = ctx; 699 mutex_exit(&kcpc_ctx_llock[hash]); 700 701 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 702 cpc_ncounters, KM_SLEEP); 703 704 ctx->kc_flags = 0; 705 ctx->kc_vtick = 0; 706 ctx->kc_rawtick = 0; 707 ctx->kc_cpuid = -1; 708 709 return (ctx); 710 } 711 712 /* 713 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 714 * in the flags. 715 */ 716 static void 717 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 718 { 719 kcpc_set_t *ks = ctx->kc_set, *cks; 720 int i, j; 721 int code; 722 723 ASSERT(ks != NULL); 724 725 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 726 return; 727 728 cks = kmem_alloc(sizeof (*cks), KM_SLEEP); 729 cctx->kc_set = cks; 730 cks->ks_flags = ks->ks_flags; 731 cks->ks_nreqs = ks->ks_nreqs; 732 cks->ks_req = kmem_alloc(cks->ks_nreqs * 733 sizeof (kcpc_request_t), KM_SLEEP); 734 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 735 KM_SLEEP); 736 cks->ks_ctx = cctx; 737 738 for (i = 0; i < cks->ks_nreqs; i++) { 739 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 740 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 741 (void) strncpy(cks->ks_req[i].kr_event, 742 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 743 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 744 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 745 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 746 if (ks->ks_req[i].kr_nattrs > 0) { 747 cks->ks_req[i].kr_attr = 748 kmem_alloc(ks->ks_req[i].kr_nattrs * 749 sizeof (kcpc_attr_t), KM_SLEEP); 750 } 751 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 752 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 753 ks->ks_req[i].kr_attr[j].ka_name, 754 CPC_MAX_ATTR_LEN); 755 cks->ks_req[i].kr_attr[j].ka_val = 756 ks->ks_req[i].kr_attr[j].ka_val; 757 } 758 } 759 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 760 kcpc_invalidate_config(cctx); 761 } 762 763 764 static void 765 kcpc_ctx_free(kcpc_ctx_t *ctx) 766 { 767 kcpc_ctx_t **loc; 768 long hash = CPC_HASH_CTX(ctx); 769 770 mutex_enter(&kcpc_ctx_llock[hash]); 771 loc = &kcpc_ctx_list[hash]; 772 ASSERT(*loc != NULL); 773 while (*loc != ctx) 774 loc = &(*loc)->kc_next; 775 *loc = ctx->kc_next; 776 mutex_exit(&kcpc_ctx_llock[hash]); 777 778 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 779 kmem_free(ctx, sizeof (*ctx)); 780 } 781 782 /* 783 * Generic interrupt handler used on hardware that generates 784 * overflow interrupts. 785 * 786 * Note: executed at high-level interrupt context! 787 */ 788 /*ARGSUSED*/ 789 kcpc_ctx_t * 790 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 791 { 792 kcpc_ctx_t *ctx; 793 kthread_t *t = curthread; 794 int i; 795 796 /* 797 * On both x86 and UltraSPARC, we may deliver the high-level 798 * interrupt in kernel mode, just after we've started to run an 799 * interrupt thread. (That's because the hardware helpfully 800 * delivers the overflow interrupt some random number of cycles 801 * after the instruction that caused the overflow by which time 802 * we're in some part of the kernel, not necessarily running on 803 * the right thread). 804 * 805 * Check for this case here -- find the pinned thread 806 * that was running when the interrupt went off. 807 */ 808 if (t->t_flag & T_INTR_THREAD) { 809 klwp_t *lwp; 810 811 atomic_add_32(&kcpc_intrctx_count, 1); 812 813 /* 814 * Note that t_lwp is always set to point at the underlying 815 * thread, thus this will work in the presence of nested 816 * interrupts. 817 */ 818 ctx = NULL; 819 if ((lwp = t->t_lwp) != NULL) { 820 t = lwptot(lwp); 821 ctx = t->t_cpc_ctx; 822 } 823 } else 824 ctx = t->t_cpc_ctx; 825 826 if (ctx == NULL) { 827 /* 828 * This can easily happen if we're using the counters in 829 * "shared" mode, for example, and an overflow interrupt 830 * occurs while we are running cpustat. In that case, the 831 * bound thread that has the context that belongs to this 832 * CPU is almost certainly sleeping (if it was running on 833 * the CPU we'd have found it above), and the actual 834 * interrupted thread has no knowledge of performance counters! 835 */ 836 ctx = curthread->t_cpu->cpu_cpc_ctx; 837 if (ctx != NULL) { 838 /* 839 * Return the bound context for this CPU to 840 * the interrupt handler so that it can synchronously 841 * sample the hardware counters and restart them. 842 */ 843 return (ctx); 844 } 845 846 /* 847 * As long as the overflow interrupt really is delivered early 848 * enough after trapping into the kernel to avoid switching 849 * threads, we must always be able to find the cpc context, 850 * or something went terribly wrong i.e. we ended up 851 * running a passivated interrupt thread, a kernel 852 * thread or we interrupted idle, all of which are Very Bad. 853 */ 854 if (kcpc_nullctx_panic) 855 panic("null cpc context, thread %p", (void *)t); 856 atomic_add_32(&kcpc_nullctx_count, 1); 857 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 858 /* 859 * Schedule an ast to sample the counters, which will 860 * propagate any overflow into the virtualized performance 861 * counter(s), and may deliver a signal. 862 */ 863 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 864 /* 865 * If a counter has overflowed which was counting on behalf of 866 * a request which specified CPC_OVF_NOTIFY_EMT, send the 867 * process a signal. 868 */ 869 for (i = 0; i < cpc_ncounters; i++) { 870 if (ctx->kc_pics[i].kp_req != NULL && 871 bitmap & (1 << i) && 872 ctx->kc_pics[i].kp_req->kr_flags & 873 CPC_OVF_NOTIFY_EMT) { 874 /* 875 * A signal has been requested for this PIC, so 876 * so freeze the context. The interrupt handler 877 * has already stopped the counter hardware. 878 */ 879 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 880 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 881 KCPC_PIC_OVERFLOWED); 882 } 883 } 884 aston(t); 885 } 886 return (NULL); 887 } 888 889 /* 890 * The current thread context had an overflow interrupt; we're 891 * executing here in high-level interrupt context. 892 */ 893 /*ARGSUSED*/ 894 uint_t 895 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 896 { 897 kcpc_ctx_t *ctx; 898 uint64_t bitmap; 899 900 if (pcbe_ops == NULL || 901 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 902 return (DDI_INTR_UNCLAIMED); 903 #ifdef N2_1x_CPC_WORKAROUNDS 904 /* 905 * Check if any of the supported counters overflowed. If 906 * not, it's a spurious overflow trap (Niagara2 1.x silicon 907 * bug). Ignore this trap. 908 */ 909 if ((bitmap & ((1 <<cpc_ncounters)-1)) == 0) 910 return (DDI_INTR_CLAIMED); 911 #endif 912 /* 913 * Prevent any further interrupts. 914 */ 915 pcbe_ops->pcbe_allstop(); 916 917 /* 918 * Invoke the "generic" handler. 919 * 920 * If the interrupt has occurred in the context of an lwp owning 921 * the counters, then the handler posts an AST to the lwp to 922 * trigger the actual sampling, and optionally deliver a signal or 923 * restart the counters, on the way out of the kernel using 924 * kcpc_hw_overflow_ast() (see below). 925 * 926 * On the other hand, if the handler returns the context to us 927 * directly, then it means that there are no other threads in 928 * the middle of updating it, no AST has been posted, and so we 929 * should sample the counters here, and restart them with no 930 * further fuss. 931 */ 932 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 933 uint64_t curtick = KCPC_GET_TICK(); 934 935 ctx->kc_hrtime = gethrtime_waitfree(); 936 ctx->kc_vtick += curtick - ctx->kc_rawtick; 937 ctx->kc_rawtick = curtick; 938 pcbe_ops->pcbe_sample(ctx); 939 pcbe_ops->pcbe_program(ctx); 940 } 941 942 return (DDI_INTR_CLAIMED); 943 } 944 945 /* 946 * Called from trap() when processing the ast posted by the high-level 947 * interrupt handler. 948 */ 949 int 950 kcpc_overflow_ast() 951 { 952 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 953 int i; 954 int found = 0; 955 uint64_t curtick = KCPC_GET_TICK(); 956 957 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 958 959 /* 960 * An overflow happened: sample the context to ensure that 961 * the overflow is propagated into the upper bits of the 962 * virtualized 64-bit counter(s). 963 */ 964 kpreempt_disable(); 965 ctx->kc_hrtime = gethrtime_waitfree(); 966 pcbe_ops->pcbe_sample(ctx); 967 kpreempt_enable(); 968 969 ctx->kc_vtick += curtick - ctx->kc_rawtick; 970 971 /* 972 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 973 * if that pic generated an overflow and if the request it was counting 974 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 975 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 976 * found any overflowed pics, keep the context frozen and return true 977 * (thus causing a signal to be sent). 978 */ 979 for (i = 0; i < cpc_ncounters; i++) { 980 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 981 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 982 ~KCPC_PIC_OVERFLOWED); 983 found = 1; 984 } 985 } 986 if (found) 987 return (1); 988 989 /* 990 * Otherwise, re-enable the counters and continue life as before. 991 */ 992 kpreempt_disable(); 993 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 994 pcbe_ops->pcbe_program(ctx); 995 kpreempt_enable(); 996 return (0); 997 } 998 999 /* 1000 * Called when switching away from current thread. 1001 */ 1002 static void 1003 kcpc_save(kcpc_ctx_t *ctx) 1004 { 1005 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1006 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 1007 return; 1008 /* 1009 * This context has been invalidated but the counters have not 1010 * been stopped. Stop them here and mark the context stopped. 1011 */ 1012 pcbe_ops->pcbe_allstop(); 1013 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1014 return; 1015 } 1016 1017 pcbe_ops->pcbe_allstop(); 1018 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1019 return; 1020 1021 /* 1022 * Need to sample for all reqs into each req's current mpic. 1023 */ 1024 ctx->kc_hrtime = gethrtime(); 1025 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1026 pcbe_ops->pcbe_sample(ctx); 1027 } 1028 1029 static void 1030 kcpc_restore(kcpc_ctx_t *ctx) 1031 { 1032 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1033 KCPC_CTX_INVALID) 1034 /* 1035 * The context is invalidated but has not been marked stopped. 1036 * We mark it as such here because we will not start the 1037 * counters during this context switch. 1038 */ 1039 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1040 1041 1042 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) 1043 return; 1044 1045 /* 1046 * While programming the hardware, the counters should be stopped. We 1047 * don't do an explicit pcbe_allstop() here because they should have 1048 * been stopped already by the last consumer. 1049 */ 1050 ctx->kc_rawtick = KCPC_GET_TICK(); 1051 pcbe_ops->pcbe_program(ctx); 1052 } 1053 1054 /* 1055 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1056 * following context operators to the idle thread on each CPU. They stop the 1057 * counters when the idle thread is switched on, and they start them again when 1058 * it is switched off. 1059 */ 1060 1061 /*ARGSUSED*/ 1062 void 1063 kcpc_idle_save(struct cpu *cp) 1064 { 1065 /* 1066 * The idle thread shouldn't be run anywhere else. 1067 */ 1068 ASSERT(CPU == cp); 1069 1070 /* 1071 * We must hold the CPU's context lock to ensure the context isn't freed 1072 * while we're looking at it. 1073 */ 1074 mutex_enter(&cp->cpu_cpc_ctxlock); 1075 1076 if ((cp->cpu_cpc_ctx == NULL) || 1077 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1078 mutex_exit(&cp->cpu_cpc_ctxlock); 1079 return; 1080 } 1081 1082 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1083 mutex_exit(&cp->cpu_cpc_ctxlock); 1084 } 1085 1086 void 1087 kcpc_idle_restore(struct cpu *cp) 1088 { 1089 /* 1090 * The idle thread shouldn't be run anywhere else. 1091 */ 1092 ASSERT(CPU == cp); 1093 1094 /* 1095 * We must hold the CPU's context lock to ensure the context isn't freed 1096 * while we're looking at it. 1097 */ 1098 mutex_enter(&cp->cpu_cpc_ctxlock); 1099 1100 if ((cp->cpu_cpc_ctx == NULL) || 1101 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1102 mutex_exit(&cp->cpu_cpc_ctxlock); 1103 return; 1104 } 1105 1106 pcbe_ops->pcbe_allstop(); 1107 mutex_exit(&cp->cpu_cpc_ctxlock); 1108 } 1109 1110 /*ARGSUSED*/ 1111 static void 1112 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1113 { 1114 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1115 int i; 1116 1117 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1118 return; 1119 1120 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1121 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1122 rw_exit(&kcpc_cpuctx_lock); 1123 return; 1124 } 1125 cctx = kcpc_ctx_alloc(); 1126 kcpc_ctx_clone(ctx, cctx); 1127 rw_exit(&kcpc_cpuctx_lock); 1128 1129 /* 1130 * Copy the parent context's kc_flags field, but don't overwrite 1131 * the child's in case it was modified during kcpc_ctx_clone. 1132 */ 1133 cctx->kc_flags |= ctx->kc_flags; 1134 cctx->kc_thread = ct; 1135 cctx->kc_cpuid = -1; 1136 ct->t_cpc_set = cctx->kc_set; 1137 ct->t_cpc_ctx = cctx; 1138 1139 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1140 kcpc_set_t *ks = cctx->kc_set; 1141 /* 1142 * Our contract with the user requires us to immediately send an 1143 * overflow signal to all children if we have the LWPINHERIT 1144 * and SIGOVF flags set. In addition, all counters should be 1145 * set to UINT64_MAX, and their pic's overflow flag turned on 1146 * so that our trap() processing knows to send a signal. 1147 */ 1148 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1149 for (i = 0; i < ks->ks_nreqs; i++) { 1150 kcpc_request_t *kr = &ks->ks_req[i]; 1151 1152 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1153 *(kr->kr_data) = UINT64_MAX; 1154 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1155 } 1156 } 1157 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1158 aston(ct); 1159 } 1160 1161 installctx(ct, cctx, kcpc_save, kcpc_restore, 1162 NULL, kcpc_lwp_create, NULL, kcpc_free); 1163 } 1164 1165 /* 1166 * Counter Stoppage Theory 1167 * 1168 * The counters may need to be stopped properly at the following occasions: 1169 * 1170 * 1) An LWP exits. 1171 * 2) A thread exits. 1172 * 3) An LWP performs an exec(). 1173 * 4) A bound set is unbound. 1174 * 1175 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1176 * to be freed as well. 1177 * 1178 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1179 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1180 * context. 1181 * 1182 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1183 * 1184 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1185 * been called from exec. It stops the counters _and_ frees the context. 1186 * 1187 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1188 * 1189 * CPU-bound counters are always stopped via kcpc_unbind(). 1190 */ 1191 1192 /* 1193 * We're being called to delete the context; we ensure that all associated data 1194 * structures are freed, and that the hardware is passivated if this is an exec. 1195 */ 1196 1197 /*ARGSUSED*/ 1198 static void 1199 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1200 { 1201 int i; 1202 kcpc_set_t *set = ctx->kc_set; 1203 1204 ASSERT(set != NULL); 1205 1206 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1207 1208 if (isexec) { 1209 /* 1210 * This thread is execing, and after the exec it should not have 1211 * any performance counter context. Stop the counters properly 1212 * here so the system isn't surprised by an overflow interrupt 1213 * later. 1214 */ 1215 if (ctx->kc_cpuid != -1) { 1216 cpu_t *cp; 1217 /* 1218 * CPU-bound context; stop the appropriate CPU's ctrs. 1219 * Hold cpu_lock while examining the CPU to ensure it 1220 * doesn't go away. 1221 */ 1222 mutex_enter(&cpu_lock); 1223 cp = cpu_get(ctx->kc_cpuid); 1224 /* 1225 * The CPU could have been DR'd out, so only stop the 1226 * CPU and clear its context pointer if the CPU still 1227 * exists. 1228 */ 1229 if (cp != NULL) { 1230 mutex_enter(&cp->cpu_cpc_ctxlock); 1231 kcpc_stop_hw(ctx); 1232 cp->cpu_cpc_ctx = NULL; 1233 mutex_exit(&cp->cpu_cpc_ctxlock); 1234 } 1235 mutex_exit(&cpu_lock); 1236 ASSERT(curthread->t_cpc_ctx == NULL); 1237 } else { 1238 /* 1239 * Thread-bound context; stop _this_ CPU's counters. 1240 */ 1241 kpreempt_disable(); 1242 pcbe_ops->pcbe_allstop(); 1243 atomic_or_uint(&ctx->kc_flags, 1244 KCPC_CTX_INVALID_STOPPED); 1245 kpreempt_enable(); 1246 curthread->t_cpc_ctx = NULL; 1247 } 1248 1249 /* 1250 * Since we are being called from an exec and we know that 1251 * exec is not permitted via the agent thread, we should clean 1252 * up this thread's CPC state completely, and not leave dangling 1253 * CPC pointers behind. 1254 */ 1255 ASSERT(ctx->kc_thread == curthread); 1256 curthread->t_cpc_set = NULL; 1257 } 1258 1259 /* 1260 * Walk through each request in this context's set and free the PCBE's 1261 * configuration if it exists. 1262 */ 1263 for (i = 0; i < set->ks_nreqs; i++) { 1264 if (set->ks_req[i].kr_config != NULL) 1265 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1266 } 1267 1268 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1269 kcpc_ctx_free(ctx); 1270 kcpc_free_set(set); 1271 } 1272 1273 /* 1274 * Free the memory associated with a request set. 1275 */ 1276 void 1277 kcpc_free_set(kcpc_set_t *set) 1278 { 1279 int i; 1280 kcpc_request_t *req; 1281 1282 ASSERT(set->ks_req != NULL); 1283 1284 for (i = 0; i < set->ks_nreqs; i++) { 1285 req = &set->ks_req[i]; 1286 1287 if (req->kr_nattrs != 0) { 1288 kmem_free(req->kr_attr, 1289 req->kr_nattrs * sizeof (kcpc_attr_t)); 1290 } 1291 } 1292 1293 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1294 kmem_free(set, sizeof (kcpc_set_t)); 1295 } 1296 1297 /* 1298 * Grab every existing context and mark it as invalid. 1299 */ 1300 void 1301 kcpc_invalidate_all(void) 1302 { 1303 kcpc_ctx_t *ctx; 1304 long hash; 1305 1306 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1307 mutex_enter(&kcpc_ctx_llock[hash]); 1308 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1309 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1310 mutex_exit(&kcpc_ctx_llock[hash]); 1311 } 1312 } 1313 1314 /* 1315 * Interface for PCBEs to signal that an existing configuration has suddenly 1316 * become invalid. 1317 */ 1318 void 1319 kcpc_invalidate_config(void *token) 1320 { 1321 kcpc_ctx_t *ctx = token; 1322 1323 ASSERT(ctx != NULL); 1324 1325 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1326 } 1327 1328 /* 1329 * Called from lwp_exit() and thread_exit() 1330 */ 1331 void 1332 kcpc_passivate(void) 1333 { 1334 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1335 kcpc_set_t *set = curthread->t_cpc_set; 1336 1337 if (set == NULL) 1338 return; 1339 1340 /* 1341 * We're cleaning up after this thread; ensure there are no dangling 1342 * CPC pointers left behind. The context and set will be freed by 1343 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1344 * the case of a CPU-bound set. 1345 */ 1346 curthread->t_cpc_ctx = NULL; 1347 1348 if (ctx == NULL) { 1349 /* 1350 * This thread has a set but no context; it must be a CPU-bound 1351 * set. The hardware will be stopped via kcpc_unbind() when the 1352 * process exits and closes its file descriptors with 1353 * kcpc_close(). Our only job here is to clean up this thread's 1354 * state; the set will be freed with the unbind(). 1355 */ 1356 (void) kcpc_unbind(set); 1357 /* 1358 * Unbinding a set belonging to the current thread should clear 1359 * its set pointer. 1360 */ 1361 ASSERT(curthread->t_cpc_set == NULL); 1362 return; 1363 } 1364 1365 curthread->t_cpc_set = NULL; 1366 1367 /* 1368 * This thread/LWP is exiting but context switches will continue to 1369 * happen for a bit as the exit proceeds. Kernel preemption must be 1370 * disabled here to prevent a race between checking or setting the 1371 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1372 * a context switch. 1373 */ 1374 1375 kpreempt_disable(); 1376 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1377 pcbe_ops->pcbe_allstop(); 1378 atomic_or_uint(&ctx->kc_flags, 1379 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1380 } 1381 kpreempt_enable(); 1382 } 1383 1384 /* 1385 * Assign the requests in the given set to the PICs in the context. 1386 * Returns 0 if successful, -1 on failure. 1387 */ 1388 /*ARGSUSED*/ 1389 static int 1390 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1391 { 1392 int i; 1393 int *picnum_save; 1394 1395 ASSERT(set->ks_nreqs <= cpc_ncounters); 1396 1397 /* 1398 * Provide kcpc_tryassign() with scratch space to avoid doing an 1399 * alloc/free with every invocation. 1400 */ 1401 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1402 /* 1403 * kcpc_tryassign() blindly walks through each request in the set, 1404 * seeing if a counter can count its event. If yes, it assigns that 1405 * counter. However, that counter may have been the only capable counter 1406 * for _another_ request's event. The solution is to try every possible 1407 * request first. Note that this does not cover all solutions, as 1408 * that would require all unique orderings of requests, an n^n operation 1409 * which would be unacceptable for architectures with many counters. 1410 */ 1411 for (i = 0; i < set->ks_nreqs; i++) 1412 if (kcpc_tryassign(set, i, picnum_save) == 0) 1413 break; 1414 1415 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1416 if (i == set->ks_nreqs) 1417 return (-1); 1418 return (0); 1419 } 1420 1421 static int 1422 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1423 { 1424 int i; 1425 int j; 1426 uint64_t bitmap = 0, resmap = 0; 1427 uint64_t ctrmap; 1428 1429 /* 1430 * We are attempting to assign the reqs to pics, but we may fail. If we 1431 * fail, we need to restore the state of the requests to what it was 1432 * when we found it, as some reqs may have been explicitly assigned to 1433 * a specific PIC beforehand. We do this by snapshotting the assignments 1434 * now and restoring from it later if we fail. 1435 * 1436 * Also we note here which counters have already been claimed by 1437 * requests with explicit counter assignments. 1438 */ 1439 for (i = 0; i < set->ks_nreqs; i++) { 1440 scratch[i] = set->ks_req[i].kr_picnum; 1441 if (set->ks_req[i].kr_picnum != -1) 1442 resmap |= (1 << set->ks_req[i].kr_picnum); 1443 } 1444 1445 /* 1446 * Walk through requests assigning them to the first PIC that is 1447 * capable. 1448 */ 1449 i = starting_req; 1450 do { 1451 if (set->ks_req[i].kr_picnum != -1) { 1452 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1453 bitmap |= (1 << set->ks_req[i].kr_picnum); 1454 if (++i == set->ks_nreqs) 1455 i = 0; 1456 continue; 1457 } 1458 1459 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1460 for (j = 0; j < cpc_ncounters; j++) { 1461 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1462 (resmap & (1 << j)) == 0) { 1463 /* 1464 * We can assign this counter because: 1465 * 1466 * 1. It can count the event (ctrmap) 1467 * 2. It hasn't been assigned yet (bitmap) 1468 * 3. It wasn't reserved by a request (resmap) 1469 */ 1470 bitmap |= (1 << j); 1471 break; 1472 } 1473 } 1474 if (j == cpc_ncounters) { 1475 for (i = 0; i < set->ks_nreqs; i++) 1476 set->ks_req[i].kr_picnum = scratch[i]; 1477 return (-1); 1478 } 1479 set->ks_req[i].kr_picnum = j; 1480 1481 if (++i == set->ks_nreqs) 1482 i = 0; 1483 } while (i != starting_req); 1484 1485 return (0); 1486 } 1487 1488 kcpc_set_t * 1489 kcpc_dup_set(kcpc_set_t *set) 1490 { 1491 kcpc_set_t *new; 1492 int i; 1493 int j; 1494 1495 new = kmem_alloc(sizeof (*new), KM_SLEEP); 1496 new->ks_flags = set->ks_flags; 1497 new->ks_nreqs = set->ks_nreqs; 1498 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1499 KM_SLEEP); 1500 new->ks_data = NULL; 1501 new->ks_ctx = NULL; 1502 1503 for (i = 0; i < new->ks_nreqs; i++) { 1504 new->ks_req[i].kr_config = NULL; 1505 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1506 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1507 new->ks_req[i].kr_picp = NULL; 1508 new->ks_req[i].kr_data = NULL; 1509 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1510 CPC_MAX_EVENT_LEN); 1511 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1512 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1513 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1514 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1515 sizeof (kcpc_attr_t), KM_SLEEP); 1516 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1517 new->ks_req[i].kr_attr[j].ka_val = 1518 set->ks_req[i].kr_attr[j].ka_val; 1519 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1520 set->ks_req[i].kr_attr[j].ka_name, 1521 CPC_MAX_ATTR_LEN); 1522 } 1523 } 1524 1525 return (new); 1526 } 1527 1528 int 1529 kcpc_allow_nonpriv(void *token) 1530 { 1531 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1532 } 1533 1534 void 1535 kcpc_invalidate(kthread_t *t) 1536 { 1537 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1538 1539 if (ctx != NULL) 1540 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1541 } 1542 1543 /* 1544 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1545 * are used to construct PCBE names, starting with the most specific, 1546 * "pcbe.first.second.third.fourth" and ending with the least specific, 1547 * "pcbe.first". 1548 * 1549 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1550 */ 1551 int 1552 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1553 { 1554 uint_t s[3]; 1555 1556 s[0] = first; 1557 s[1] = second; 1558 s[2] = third; 1559 1560 return (modload_qualified("pcbe", 1561 "pcbe", prefix, ".", s, 3) < 0 ? -1 : 0); 1562 } 1563