1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/inttypes.h> 33 #include <sys/cmn_err.h> 34 #include <sys/time.h> 35 #include <sys/mutex.h> 36 #include <sys/systm.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_impl.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/atomic.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #if defined(__x86) 45 #include <asm/clock.h> 46 #endif 47 48 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 49 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 50 51 52 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 53 int kcpc_cpuctx; /* number of cpu-specific contexts */ 54 55 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 56 57 /* 58 * These are set when a PCBE module is loaded. 59 */ 60 uint_t cpc_ncounters = 0; 61 pcbe_ops_t *pcbe_ops = NULL; 62 63 /* 64 * Statistics on (mis)behavior 65 */ 66 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 67 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 68 69 /* 70 * Is misbehaviour (overflow in a thread with no context) fatal? 71 */ 72 #ifdef DEBUG 73 static int kcpc_nullctx_panic = 1; 74 #else 75 static int kcpc_nullctx_panic = 0; 76 #endif 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 82 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 83 static void kcpc_free_configs(kcpc_set_t *set); 84 static kcpc_ctx_t *kcpc_ctx_alloc(void); 85 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 86 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 87 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 88 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 89 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 90 91 void 92 kcpc_register_pcbe(pcbe_ops_t *ops) 93 { 94 pcbe_ops = ops; 95 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 96 } 97 98 int 99 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 100 { 101 cpu_t *cp; 102 kcpc_ctx_t *ctx; 103 int error; 104 105 ctx = kcpc_ctx_alloc(); 106 107 if (kcpc_assign_reqs(set, ctx) != 0) { 108 kcpc_ctx_free(ctx); 109 *subcode = CPC_RESOURCE_UNAVAIL; 110 return (EINVAL); 111 } 112 113 ctx->kc_cpuid = cpuid; 114 ctx->kc_thread = curthread; 115 116 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 117 118 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 119 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 120 kcpc_ctx_free(ctx); 121 return (error); 122 } 123 124 set->ks_ctx = ctx; 125 ctx->kc_set = set; 126 127 /* 128 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 129 * we are manipulating the cpu_t and programming the hardware, else the 130 * the cpu_t could go away while we're looking at it. 131 */ 132 mutex_enter(&cpu_lock); 133 cp = cpu_get(cpuid); 134 135 if (cp == NULL) 136 /* 137 * The CPU could have been DRd out while we were getting set up. 138 */ 139 goto unbound; 140 141 mutex_enter(&cp->cpu_cpc_ctxlock); 142 143 if (cp->cpu_cpc_ctx != NULL) { 144 /* 145 * If this CPU already has a bound set, return an error. 146 */ 147 mutex_exit(&cp->cpu_cpc_ctxlock); 148 goto unbound; 149 } 150 151 if (curthread->t_bind_cpu != cpuid) { 152 mutex_exit(&cp->cpu_cpc_ctxlock); 153 goto unbound; 154 } 155 cp->cpu_cpc_ctx = ctx; 156 157 /* 158 * Kernel preemption must be disabled while fiddling with the hardware 159 * registers to prevent partial updates. 160 */ 161 kpreempt_disable(); 162 ctx->kc_rawtick = KCPC_GET_TICK(); 163 pcbe_ops->pcbe_program(ctx); 164 kpreempt_enable(); 165 166 mutex_exit(&cp->cpu_cpc_ctxlock); 167 mutex_exit(&cpu_lock); 168 169 return (0); 170 171 unbound: 172 mutex_exit(&cpu_lock); 173 set->ks_ctx = NULL; 174 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 175 kcpc_ctx_free(ctx); 176 return (EAGAIN); 177 } 178 179 int 180 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 181 { 182 kcpc_ctx_t *ctx; 183 int error; 184 185 /* 186 * Only one set is allowed per context, so ensure there is no 187 * existing context. 188 */ 189 190 if (t->t_cpc_ctx != NULL) 191 return (EEXIST); 192 193 ctx = kcpc_ctx_alloc(); 194 195 /* 196 * The context must begin life frozen until it has been properly 197 * programmed onto the hardware. This prevents the context ops from 198 * worrying about it until we're ready. 199 */ 200 ctx->kc_flags |= KCPC_CTX_FREEZE; 201 ctx->kc_hrtime = gethrtime(); 202 203 if (kcpc_assign_reqs(set, ctx) != 0) { 204 kcpc_ctx_free(ctx); 205 *subcode = CPC_RESOURCE_UNAVAIL; 206 return (EINVAL); 207 } 208 209 ctx->kc_cpuid = -1; 210 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 211 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 212 ctx->kc_thread = t; 213 t->t_cpc_ctx = ctx; 214 /* 215 * Permit threads to look at their own hardware counters from userland. 216 */ 217 ctx->kc_flags |= KCPC_CTX_NONPRIV; 218 219 /* 220 * Create the data store for this set. 221 */ 222 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 223 224 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 225 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 226 kcpc_ctx_free(ctx); 227 t->t_cpc_ctx = NULL; 228 return (error); 229 } 230 231 set->ks_ctx = ctx; 232 ctx->kc_set = set; 233 234 /* 235 * Add a device context to the subject thread. 236 */ 237 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 238 kcpc_lwp_create, NULL, kcpc_free); 239 240 /* 241 * Ask the backend to program the hardware. 242 */ 243 if (t == curthread) { 244 kpreempt_disable(); 245 ctx->kc_rawtick = KCPC_GET_TICK(); 246 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 247 pcbe_ops->pcbe_program(ctx); 248 kpreempt_enable(); 249 } else 250 /* 251 * Since we are the agent LWP, we know the victim LWP is stopped 252 * until we're done here; no need to worry about preemption or 253 * migration here. We still use an atomic op to clear the flag 254 * to ensure the flags are always self-consistent; they can 255 * still be accessed from, for instance, another CPU doing a 256 * kcpc_invalidate_all(). 257 */ 258 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 259 260 261 return (0); 262 } 263 264 /* 265 * Walk through each request in the set and ask the PCBE to configure a 266 * corresponding counter. 267 */ 268 static int 269 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 270 { 271 int i; 272 int ret; 273 kcpc_request_t *rp; 274 275 for (i = 0; i < set->ks_nreqs; i++) { 276 int n; 277 rp = &set->ks_req[i]; 278 279 n = rp->kr_picnum; 280 281 ASSERT(n >= 0 && n < cpc_ncounters); 282 283 ASSERT(ctx->kc_pics[n].kp_req == NULL); 284 285 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 286 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 287 == 0) { 288 *subcode = -1; 289 return (ENOTSUP); 290 } 291 /* 292 * If any of the counters have requested overflow 293 * notification, we flag the context as being one that 294 * cares about overflow. 295 */ 296 ctx->kc_flags |= KCPC_CTX_SIGOVF; 297 } 298 299 rp->kr_config = NULL; 300 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 301 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 302 &(rp->kr_config), (void *)ctx)) != 0) { 303 kcpc_free_configs(set); 304 *subcode = ret; 305 switch (ret) { 306 case CPC_ATTR_REQUIRES_PRIVILEGE: 307 case CPC_HV_NO_ACCESS: 308 return (EACCES); 309 default: 310 return (EINVAL); 311 } 312 } 313 314 ctx->kc_pics[n].kp_req = rp; 315 rp->kr_picp = &ctx->kc_pics[n]; 316 rp->kr_data = set->ks_data + rp->kr_index; 317 *rp->kr_data = rp->kr_preset; 318 } 319 320 return (0); 321 } 322 323 static void 324 kcpc_free_configs(kcpc_set_t *set) 325 { 326 int i; 327 328 for (i = 0; i < set->ks_nreqs; i++) 329 if (set->ks_req[i].kr_config != NULL) 330 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 331 } 332 333 /* 334 * buf points to a user address and the data should be copied out to that 335 * address in the current process. 336 */ 337 int 338 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 339 { 340 kcpc_ctx_t *ctx = set->ks_ctx; 341 uint64_t curtick = KCPC_GET_TICK(); 342 343 if (ctx == NULL) 344 return (EINVAL); 345 else if (ctx->kc_flags & KCPC_CTX_INVALID) 346 return (EAGAIN); 347 348 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 349 /* 350 * Kernel preemption must be disabled while reading the 351 * hardware regs, and if this is a CPU-bound context, while 352 * checking the CPU binding of the current thread. 353 */ 354 kpreempt_disable(); 355 356 if (ctx->kc_cpuid != -1) { 357 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 358 kpreempt_enable(); 359 return (EAGAIN); 360 } 361 } 362 363 if (ctx->kc_thread == curthread) { 364 ctx->kc_hrtime = gethrtime(); 365 pcbe_ops->pcbe_sample(ctx); 366 ctx->kc_vtick += curtick - ctx->kc_rawtick; 367 ctx->kc_rawtick = curtick; 368 } 369 370 kpreempt_enable(); 371 372 /* 373 * The config may have been invalidated by 374 * the pcbe_sample op. 375 */ 376 if (ctx->kc_flags & KCPC_CTX_INVALID) 377 return (EAGAIN); 378 } 379 380 if (copyout(set->ks_data, buf, 381 set->ks_nreqs * sizeof (uint64_t)) == -1) 382 return (EFAULT); 383 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 384 return (EFAULT); 385 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 386 return (EFAULT); 387 388 return (0); 389 } 390 391 /* 392 * Stop the counters on the CPU this context is bound to. 393 */ 394 static void 395 kcpc_stop_hw(kcpc_ctx_t *ctx) 396 { 397 cpu_t *cp; 398 399 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 400 == KCPC_CTX_INVALID); 401 402 kpreempt_disable(); 403 404 cp = cpu_get(ctx->kc_cpuid); 405 ASSERT(cp != NULL); 406 407 if (cp == CPU) { 408 pcbe_ops->pcbe_allstop(); 409 atomic_or_uint(&ctx->kc_flags, 410 KCPC_CTX_INVALID_STOPPED); 411 } else 412 kcpc_remote_stop(cp); 413 kpreempt_enable(); 414 } 415 416 int 417 kcpc_unbind(kcpc_set_t *set) 418 { 419 kcpc_ctx_t *ctx = set->ks_ctx; 420 kthread_t *t; 421 422 if (ctx == NULL) 423 return (EINVAL); 424 425 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 426 427 if (ctx->kc_cpuid == -1) { 428 t = ctx->kc_thread; 429 /* 430 * The context is thread-bound and therefore has a device 431 * context. It will be freed via removectx() calling 432 * freectx() calling kcpc_free(). 433 */ 434 if (t == curthread && 435 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 436 kpreempt_disable(); 437 pcbe_ops->pcbe_allstop(); 438 atomic_or_uint(&ctx->kc_flags, 439 KCPC_CTX_INVALID_STOPPED); 440 kpreempt_enable(); 441 } 442 #ifdef DEBUG 443 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 444 kcpc_lwp_create, NULL, kcpc_free) == 0) 445 panic("kcpc_unbind: context %p not preset on thread %p", 446 ctx, t); 447 #else 448 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 449 kcpc_lwp_create, NULL, kcpc_free); 450 #endif /* DEBUG */ 451 t->t_cpc_set = NULL; 452 t->t_cpc_ctx = NULL; 453 } else { 454 /* 455 * If we are unbinding a CPU-bound set from a remote CPU, the 456 * native CPU's idle thread could be in the midst of programming 457 * this context onto the CPU. We grab the context's lock here to 458 * ensure that the idle thread is done with it. When we release 459 * the lock, the CPU no longer has a context and the idle thread 460 * will move on. 461 * 462 * cpu_lock must be held to prevent the CPU from being DR'd out 463 * while we disassociate the context from the cpu_t. 464 */ 465 cpu_t *cp; 466 mutex_enter(&cpu_lock); 467 cp = cpu_get(ctx->kc_cpuid); 468 if (cp != NULL) { 469 /* 470 * The CPU may have been DR'd out of the system. 471 */ 472 mutex_enter(&cp->cpu_cpc_ctxlock); 473 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 474 kcpc_stop_hw(ctx); 475 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 476 cp->cpu_cpc_ctx = NULL; 477 mutex_exit(&cp->cpu_cpc_ctxlock); 478 } 479 mutex_exit(&cpu_lock); 480 if (ctx->kc_thread == curthread) { 481 kcpc_free(ctx, 0); 482 curthread->t_cpc_set = NULL; 483 } 484 } 485 486 return (0); 487 } 488 489 int 490 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 491 { 492 int i; 493 494 ASSERT(set != NULL); 495 ASSERT(set->ks_ctx != NULL); 496 ASSERT(set->ks_ctx->kc_thread == curthread); 497 ASSERT(set->ks_ctx->kc_cpuid == -1); 498 499 if (index < 0 || index >= set->ks_nreqs) 500 return (EINVAL); 501 502 for (i = 0; i < set->ks_nreqs; i++) 503 if (set->ks_req[i].kr_index == index) 504 break; 505 ASSERT(i != set->ks_nreqs); 506 507 set->ks_req[i].kr_preset = preset; 508 return (0); 509 } 510 511 int 512 kcpc_restart(kcpc_set_t *set) 513 { 514 kcpc_ctx_t *ctx = set->ks_ctx; 515 int i; 516 517 ASSERT(ctx != NULL); 518 ASSERT(ctx->kc_thread == curthread); 519 ASSERT(ctx->kc_cpuid == -1); 520 521 kpreempt_disable(); 522 523 /* 524 * If the user is doing this on a running set, make sure the counters 525 * are stopped first. 526 */ 527 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 528 pcbe_ops->pcbe_allstop(); 529 530 for (i = 0; i < set->ks_nreqs; i++) { 531 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 532 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 533 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 534 } 535 536 /* 537 * Ask the backend to program the hardware. 538 */ 539 ctx->kc_rawtick = KCPC_GET_TICK(); 540 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 541 pcbe_ops->pcbe_program(ctx); 542 kpreempt_enable(); 543 544 return (0); 545 } 546 547 /* 548 * Caller must hold kcpc_cpuctx_lock. 549 */ 550 int 551 kcpc_enable(kthread_t *t, int cmd, int enable) 552 { 553 kcpc_ctx_t *ctx = t->t_cpc_ctx; 554 kcpc_set_t *set = t->t_cpc_set; 555 kcpc_set_t *newset; 556 int i; 557 int flag; 558 int err; 559 560 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 561 562 if (ctx == NULL) { 563 /* 564 * This thread has a set but no context; it must be a 565 * CPU-bound set. 566 */ 567 ASSERT(t->t_cpc_set != NULL); 568 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 569 return (EINVAL); 570 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 571 return (EAGAIN); 572 573 if (cmd == CPC_ENABLE) { 574 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 575 return (EINVAL); 576 kpreempt_disable(); 577 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 578 kcpc_restore(ctx); 579 kpreempt_enable(); 580 } else if (cmd == CPC_DISABLE) { 581 if (ctx->kc_flags & KCPC_CTX_FREEZE) 582 return (EINVAL); 583 kpreempt_disable(); 584 kcpc_save(ctx); 585 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 586 kpreempt_enable(); 587 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 588 /* 589 * Strategy for usr/sys: stop counters and update set's presets 590 * with current counter values, unbind, update requests with 591 * new config, then re-bind. 592 */ 593 flag = (cmd == CPC_USR_EVENTS) ? 594 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 595 596 kpreempt_disable(); 597 atomic_or_uint(&ctx->kc_flags, 598 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 599 pcbe_ops->pcbe_allstop(); 600 kpreempt_enable(); 601 for (i = 0; i < set->ks_nreqs; i++) { 602 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 603 if (enable) 604 set->ks_req[i].kr_flags |= flag; 605 else 606 set->ks_req[i].kr_flags &= ~flag; 607 } 608 newset = kcpc_dup_set(set); 609 if (kcpc_unbind(set) != 0) 610 return (EINVAL); 611 t->t_cpc_set = newset; 612 if (kcpc_bind_thread(newset, t, &err) != 0) { 613 t->t_cpc_set = NULL; 614 kcpc_free_set(newset); 615 return (EINVAL); 616 } 617 } else 618 return (EINVAL); 619 620 return (0); 621 } 622 623 /* 624 * Provide PCBEs with a way of obtaining the configs of every counter which will 625 * be programmed together. 626 * 627 * If current is NULL, provide the first config. 628 * 629 * If data != NULL, caller wants to know where the data store associated with 630 * the config we return is located. 631 */ 632 void * 633 kcpc_next_config(void *token, void *current, uint64_t **data) 634 { 635 int i; 636 kcpc_pic_t *pic; 637 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 638 639 if (current == NULL) { 640 /* 641 * Client would like the first config, which may not be in 642 * counter 0; we need to search through the counters for the 643 * first config. 644 */ 645 for (i = 0; i < cpc_ncounters; i++) 646 if (ctx->kc_pics[i].kp_req != NULL) 647 break; 648 /* 649 * There are no counters configured for the given context. 650 */ 651 if (i == cpc_ncounters) 652 return (NULL); 653 } else { 654 /* 655 * There surely is a faster way to do this. 656 */ 657 for (i = 0; i < cpc_ncounters; i++) { 658 pic = &ctx->kc_pics[i]; 659 660 if (pic->kp_req != NULL && 661 current == pic->kp_req->kr_config) 662 break; 663 } 664 665 /* 666 * We found the current config at picnum i. Now search for the 667 * next configured PIC. 668 */ 669 for (i++; i < cpc_ncounters; i++) { 670 pic = &ctx->kc_pics[i]; 671 if (pic->kp_req != NULL) 672 break; 673 } 674 675 if (i == cpc_ncounters) 676 return (NULL); 677 } 678 679 if (data != NULL) { 680 *data = ctx->kc_pics[i].kp_req->kr_data; 681 } 682 683 return (ctx->kc_pics[i].kp_req->kr_config); 684 } 685 686 687 static kcpc_ctx_t * 688 kcpc_ctx_alloc(void) 689 { 690 kcpc_ctx_t *ctx; 691 long hash; 692 693 ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP); 694 695 hash = CPC_HASH_CTX(ctx); 696 mutex_enter(&kcpc_ctx_llock[hash]); 697 ctx->kc_next = kcpc_ctx_list[hash]; 698 kcpc_ctx_list[hash] = ctx; 699 mutex_exit(&kcpc_ctx_llock[hash]); 700 701 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 702 cpc_ncounters, KM_SLEEP); 703 704 ctx->kc_flags = 0; 705 ctx->kc_vtick = 0; 706 ctx->kc_rawtick = 0; 707 ctx->kc_cpuid = -1; 708 709 return (ctx); 710 } 711 712 /* 713 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 714 * in the flags. 715 */ 716 static void 717 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 718 { 719 kcpc_set_t *ks = ctx->kc_set, *cks; 720 int i, j; 721 int code; 722 723 ASSERT(ks != NULL); 724 725 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 726 return; 727 728 cks = kmem_alloc(sizeof (*cks), KM_SLEEP); 729 cctx->kc_set = cks; 730 cks->ks_flags = ks->ks_flags; 731 cks->ks_nreqs = ks->ks_nreqs; 732 cks->ks_req = kmem_alloc(cks->ks_nreqs * 733 sizeof (kcpc_request_t), KM_SLEEP); 734 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 735 KM_SLEEP); 736 cks->ks_ctx = cctx; 737 738 for (i = 0; i < cks->ks_nreqs; i++) { 739 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 740 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 741 (void) strncpy(cks->ks_req[i].kr_event, 742 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 743 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 744 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 745 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 746 if (ks->ks_req[i].kr_nattrs > 0) { 747 cks->ks_req[i].kr_attr = 748 kmem_alloc(ks->ks_req[i].kr_nattrs * 749 sizeof (kcpc_attr_t), KM_SLEEP); 750 } 751 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 752 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 753 ks->ks_req[i].kr_attr[j].ka_name, 754 CPC_MAX_ATTR_LEN); 755 cks->ks_req[i].kr_attr[j].ka_val = 756 ks->ks_req[i].kr_attr[j].ka_val; 757 } 758 } 759 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 760 kcpc_invalidate_config(cctx); 761 } 762 763 764 static void 765 kcpc_ctx_free(kcpc_ctx_t *ctx) 766 { 767 kcpc_ctx_t **loc; 768 long hash = CPC_HASH_CTX(ctx); 769 770 mutex_enter(&kcpc_ctx_llock[hash]); 771 loc = &kcpc_ctx_list[hash]; 772 ASSERT(*loc != NULL); 773 while (*loc != ctx) 774 loc = &(*loc)->kc_next; 775 *loc = ctx->kc_next; 776 mutex_exit(&kcpc_ctx_llock[hash]); 777 778 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 779 kmem_free(ctx, sizeof (*ctx)); 780 } 781 782 /* 783 * Generic interrupt handler used on hardware that generates 784 * overflow interrupts. 785 * 786 * Note: executed at high-level interrupt context! 787 */ 788 /*ARGSUSED*/ 789 kcpc_ctx_t * 790 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 791 { 792 kcpc_ctx_t *ctx; 793 kthread_t *t = curthread; 794 int i; 795 796 /* 797 * On both x86 and UltraSPARC, we may deliver the high-level 798 * interrupt in kernel mode, just after we've started to run an 799 * interrupt thread. (That's because the hardware helpfully 800 * delivers the overflow interrupt some random number of cycles 801 * after the instruction that caused the overflow by which time 802 * we're in some part of the kernel, not necessarily running on 803 * the right thread). 804 * 805 * Check for this case here -- find the pinned thread 806 * that was running when the interrupt went off. 807 */ 808 if (t->t_flag & T_INTR_THREAD) { 809 klwp_t *lwp; 810 811 atomic_add_32(&kcpc_intrctx_count, 1); 812 813 /* 814 * Note that t_lwp is always set to point at the underlying 815 * thread, thus this will work in the presence of nested 816 * interrupts. 817 */ 818 ctx = NULL; 819 if ((lwp = t->t_lwp) != NULL) { 820 t = lwptot(lwp); 821 ctx = t->t_cpc_ctx; 822 } 823 } else 824 ctx = t->t_cpc_ctx; 825 826 if (ctx == NULL) { 827 /* 828 * This can easily happen if we're using the counters in 829 * "shared" mode, for example, and an overflow interrupt 830 * occurs while we are running cpustat. In that case, the 831 * bound thread that has the context that belongs to this 832 * CPU is almost certainly sleeping (if it was running on 833 * the CPU we'd have found it above), and the actual 834 * interrupted thread has no knowledge of performance counters! 835 */ 836 ctx = curthread->t_cpu->cpu_cpc_ctx; 837 if (ctx != NULL) { 838 /* 839 * Return the bound context for this CPU to 840 * the interrupt handler so that it can synchronously 841 * sample the hardware counters and restart them. 842 */ 843 return (ctx); 844 } 845 846 /* 847 * As long as the overflow interrupt really is delivered early 848 * enough after trapping into the kernel to avoid switching 849 * threads, we must always be able to find the cpc context, 850 * or something went terribly wrong i.e. we ended up 851 * running a passivated interrupt thread, a kernel 852 * thread or we interrupted idle, all of which are Very Bad. 853 */ 854 if (kcpc_nullctx_panic) 855 panic("null cpc context, thread %p", (void *)t); 856 atomic_add_32(&kcpc_nullctx_count, 1); 857 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 858 /* 859 * Schedule an ast to sample the counters, which will 860 * propagate any overflow into the virtualized performance 861 * counter(s), and may deliver a signal. 862 */ 863 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 864 /* 865 * If a counter has overflowed which was counting on behalf of 866 * a request which specified CPC_OVF_NOTIFY_EMT, send the 867 * process a signal. 868 */ 869 for (i = 0; i < cpc_ncounters; i++) { 870 if (ctx->kc_pics[i].kp_req != NULL && 871 bitmap & (1 << i) && 872 ctx->kc_pics[i].kp_req->kr_flags & 873 CPC_OVF_NOTIFY_EMT) { 874 /* 875 * A signal has been requested for this PIC, so 876 * so freeze the context. The interrupt handler 877 * has already stopped the counter hardware. 878 */ 879 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 880 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 881 KCPC_PIC_OVERFLOWED); 882 } 883 } 884 aston(t); 885 } 886 return (NULL); 887 } 888 889 /* 890 * The current thread context had an overflow interrupt; we're 891 * executing here in high-level interrupt context. 892 */ 893 /*ARGSUSED*/ 894 uint_t 895 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 896 { 897 kcpc_ctx_t *ctx; 898 uint64_t bitmap; 899 900 if (pcbe_ops == NULL || 901 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 902 return (DDI_INTR_UNCLAIMED); 903 904 /* 905 * Prevent any further interrupts. 906 */ 907 pcbe_ops->pcbe_allstop(); 908 909 /* 910 * Invoke the "generic" handler. 911 * 912 * If the interrupt has occurred in the context of an lwp owning 913 * the counters, then the handler posts an AST to the lwp to 914 * trigger the actual sampling, and optionally deliver a signal or 915 * restart the counters, on the way out of the kernel using 916 * kcpc_hw_overflow_ast() (see below). 917 * 918 * On the other hand, if the handler returns the context to us 919 * directly, then it means that there are no other threads in 920 * the middle of updating it, no AST has been posted, and so we 921 * should sample the counters here, and restart them with no 922 * further fuss. 923 */ 924 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 925 uint64_t curtick = KCPC_GET_TICK(); 926 927 ctx->kc_hrtime = gethrtime_waitfree(); 928 ctx->kc_vtick += curtick - ctx->kc_rawtick; 929 ctx->kc_rawtick = curtick; 930 pcbe_ops->pcbe_sample(ctx); 931 pcbe_ops->pcbe_program(ctx); 932 } 933 934 return (DDI_INTR_CLAIMED); 935 } 936 937 /* 938 * Called from trap() when processing the ast posted by the high-level 939 * interrupt handler. 940 */ 941 int 942 kcpc_overflow_ast() 943 { 944 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 945 int i; 946 int found = 0; 947 uint64_t curtick = KCPC_GET_TICK(); 948 949 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 950 951 /* 952 * An overflow happened: sample the context to ensure that 953 * the overflow is propagated into the upper bits of the 954 * virtualized 64-bit counter(s). 955 */ 956 kpreempt_disable(); 957 ctx->kc_hrtime = gethrtime_waitfree(); 958 pcbe_ops->pcbe_sample(ctx); 959 kpreempt_enable(); 960 961 ctx->kc_vtick += curtick - ctx->kc_rawtick; 962 963 /* 964 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 965 * if that pic generated an overflow and if the request it was counting 966 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 967 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 968 * found any overflowed pics, keep the context frozen and return true 969 * (thus causing a signal to be sent). 970 */ 971 for (i = 0; i < cpc_ncounters; i++) { 972 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 973 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 974 ~KCPC_PIC_OVERFLOWED); 975 found = 1; 976 } 977 } 978 if (found) 979 return (1); 980 981 /* 982 * Otherwise, re-enable the counters and continue life as before. 983 */ 984 kpreempt_disable(); 985 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 986 pcbe_ops->pcbe_program(ctx); 987 kpreempt_enable(); 988 return (0); 989 } 990 991 /* 992 * Called when switching away from current thread. 993 */ 994 static void 995 kcpc_save(kcpc_ctx_t *ctx) 996 { 997 if (ctx->kc_flags & KCPC_CTX_INVALID) { 998 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 999 return; 1000 /* 1001 * This context has been invalidated but the counters have not 1002 * been stopped. Stop them here and mark the context stopped. 1003 */ 1004 pcbe_ops->pcbe_allstop(); 1005 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1006 return; 1007 } 1008 1009 pcbe_ops->pcbe_allstop(); 1010 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1011 return; 1012 1013 /* 1014 * Need to sample for all reqs into each req's current mpic. 1015 */ 1016 ctx->kc_hrtime = gethrtime(); 1017 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1018 pcbe_ops->pcbe_sample(ctx); 1019 } 1020 1021 static void 1022 kcpc_restore(kcpc_ctx_t *ctx) 1023 { 1024 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1025 KCPC_CTX_INVALID) 1026 /* 1027 * The context is invalidated but has not been marked stopped. 1028 * We mark it as such here because we will not start the 1029 * counters during this context switch. 1030 */ 1031 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1032 1033 1034 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) 1035 return; 1036 1037 /* 1038 * While programming the hardware, the counters should be stopped. We 1039 * don't do an explicit pcbe_allstop() here because they should have 1040 * been stopped already by the last consumer. 1041 */ 1042 ctx->kc_rawtick = KCPC_GET_TICK(); 1043 pcbe_ops->pcbe_program(ctx); 1044 } 1045 1046 /* 1047 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1048 * following context operators to the idle thread on each CPU. They stop the 1049 * counters when the idle thread is switched on, and they start them again when 1050 * it is switched off. 1051 */ 1052 1053 /*ARGSUSED*/ 1054 void 1055 kcpc_idle_save(struct cpu *cp) 1056 { 1057 /* 1058 * The idle thread shouldn't be run anywhere else. 1059 */ 1060 ASSERT(CPU == cp); 1061 1062 /* 1063 * We must hold the CPU's context lock to ensure the context isn't freed 1064 * while we're looking at it. 1065 */ 1066 mutex_enter(&cp->cpu_cpc_ctxlock); 1067 1068 if ((cp->cpu_cpc_ctx == NULL) || 1069 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1070 mutex_exit(&cp->cpu_cpc_ctxlock); 1071 return; 1072 } 1073 1074 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1075 mutex_exit(&cp->cpu_cpc_ctxlock); 1076 } 1077 1078 void 1079 kcpc_idle_restore(struct cpu *cp) 1080 { 1081 /* 1082 * The idle thread shouldn't be run anywhere else. 1083 */ 1084 ASSERT(CPU == cp); 1085 1086 /* 1087 * We must hold the CPU's context lock to ensure the context isn't freed 1088 * while we're looking at it. 1089 */ 1090 mutex_enter(&cp->cpu_cpc_ctxlock); 1091 1092 if ((cp->cpu_cpc_ctx == NULL) || 1093 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1094 mutex_exit(&cp->cpu_cpc_ctxlock); 1095 return; 1096 } 1097 1098 pcbe_ops->pcbe_allstop(); 1099 mutex_exit(&cp->cpu_cpc_ctxlock); 1100 } 1101 1102 /*ARGSUSED*/ 1103 static void 1104 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1105 { 1106 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1107 int i; 1108 1109 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1110 return; 1111 1112 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1113 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1114 rw_exit(&kcpc_cpuctx_lock); 1115 return; 1116 } 1117 cctx = kcpc_ctx_alloc(); 1118 kcpc_ctx_clone(ctx, cctx); 1119 rw_exit(&kcpc_cpuctx_lock); 1120 1121 /* 1122 * Copy the parent context's kc_flags field, but don't overwrite 1123 * the child's in case it was modified during kcpc_ctx_clone. 1124 */ 1125 cctx->kc_flags |= ctx->kc_flags; 1126 cctx->kc_thread = ct; 1127 cctx->kc_cpuid = -1; 1128 ct->t_cpc_set = cctx->kc_set; 1129 ct->t_cpc_ctx = cctx; 1130 1131 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1132 kcpc_set_t *ks = cctx->kc_set; 1133 /* 1134 * Our contract with the user requires us to immediately send an 1135 * overflow signal to all children if we have the LWPINHERIT 1136 * and SIGOVF flags set. In addition, all counters should be 1137 * set to UINT64_MAX, and their pic's overflow flag turned on 1138 * so that our trap() processing knows to send a signal. 1139 */ 1140 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1141 for (i = 0; i < ks->ks_nreqs; i++) { 1142 kcpc_request_t *kr = &ks->ks_req[i]; 1143 1144 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1145 *(kr->kr_data) = UINT64_MAX; 1146 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1147 } 1148 } 1149 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1150 aston(ct); 1151 } 1152 1153 installctx(ct, cctx, kcpc_save, kcpc_restore, 1154 NULL, kcpc_lwp_create, NULL, kcpc_free); 1155 } 1156 1157 /* 1158 * Counter Stoppage Theory 1159 * 1160 * The counters may need to be stopped properly at the following occasions: 1161 * 1162 * 1) An LWP exits. 1163 * 2) A thread exits. 1164 * 3) An LWP performs an exec(). 1165 * 4) A bound set is unbound. 1166 * 1167 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1168 * to be freed as well. 1169 * 1170 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1171 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1172 * context. 1173 * 1174 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1175 * 1176 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1177 * been called from exec. It stops the counters _and_ frees the context. 1178 * 1179 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1180 * 1181 * CPU-bound counters are always stopped via kcpc_unbind(). 1182 */ 1183 1184 /* 1185 * We're being called to delete the context; we ensure that all associated data 1186 * structures are freed, and that the hardware is passivated if this is an exec. 1187 */ 1188 1189 /*ARGSUSED*/ 1190 static void 1191 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1192 { 1193 int i; 1194 kcpc_set_t *set = ctx->kc_set; 1195 1196 ASSERT(set != NULL); 1197 1198 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1199 1200 if (isexec) { 1201 /* 1202 * This thread is execing, and after the exec it should not have 1203 * any performance counter context. Stop the counters properly 1204 * here so the system isn't surprised by an overflow interrupt 1205 * later. 1206 */ 1207 if (ctx->kc_cpuid != -1) { 1208 cpu_t *cp; 1209 /* 1210 * CPU-bound context; stop the appropriate CPU's ctrs. 1211 * Hold cpu_lock while examining the CPU to ensure it 1212 * doesn't go away. 1213 */ 1214 mutex_enter(&cpu_lock); 1215 cp = cpu_get(ctx->kc_cpuid); 1216 /* 1217 * The CPU could have been DR'd out, so only stop the 1218 * CPU and clear its context pointer if the CPU still 1219 * exists. 1220 */ 1221 if (cp != NULL) { 1222 mutex_enter(&cp->cpu_cpc_ctxlock); 1223 kcpc_stop_hw(ctx); 1224 cp->cpu_cpc_ctx = NULL; 1225 mutex_exit(&cp->cpu_cpc_ctxlock); 1226 } 1227 mutex_exit(&cpu_lock); 1228 ASSERT(curthread->t_cpc_ctx == NULL); 1229 } else { 1230 /* 1231 * Thread-bound context; stop _this_ CPU's counters. 1232 */ 1233 kpreempt_disable(); 1234 pcbe_ops->pcbe_allstop(); 1235 atomic_or_uint(&ctx->kc_flags, 1236 KCPC_CTX_INVALID_STOPPED); 1237 kpreempt_enable(); 1238 curthread->t_cpc_ctx = NULL; 1239 } 1240 1241 /* 1242 * Since we are being called from an exec and we know that 1243 * exec is not permitted via the agent thread, we should clean 1244 * up this thread's CPC state completely, and not leave dangling 1245 * CPC pointers behind. 1246 */ 1247 ASSERT(ctx->kc_thread == curthread); 1248 curthread->t_cpc_set = NULL; 1249 } 1250 1251 /* 1252 * Walk through each request in this context's set and free the PCBE's 1253 * configuration if it exists. 1254 */ 1255 for (i = 0; i < set->ks_nreqs; i++) { 1256 if (set->ks_req[i].kr_config != NULL) 1257 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1258 } 1259 1260 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1261 kcpc_ctx_free(ctx); 1262 kcpc_free_set(set); 1263 } 1264 1265 /* 1266 * Free the memory associated with a request set. 1267 */ 1268 void 1269 kcpc_free_set(kcpc_set_t *set) 1270 { 1271 int i; 1272 kcpc_request_t *req; 1273 1274 ASSERT(set->ks_req != NULL); 1275 1276 for (i = 0; i < set->ks_nreqs; i++) { 1277 req = &set->ks_req[i]; 1278 1279 if (req->kr_nattrs != 0) { 1280 kmem_free(req->kr_attr, 1281 req->kr_nattrs * sizeof (kcpc_attr_t)); 1282 } 1283 } 1284 1285 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1286 kmem_free(set, sizeof (kcpc_set_t)); 1287 } 1288 1289 /* 1290 * Grab every existing context and mark it as invalid. 1291 */ 1292 void 1293 kcpc_invalidate_all(void) 1294 { 1295 kcpc_ctx_t *ctx; 1296 long hash; 1297 1298 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1299 mutex_enter(&kcpc_ctx_llock[hash]); 1300 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1301 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1302 mutex_exit(&kcpc_ctx_llock[hash]); 1303 } 1304 } 1305 1306 /* 1307 * Interface for PCBEs to signal that an existing configuration has suddenly 1308 * become invalid. 1309 */ 1310 void 1311 kcpc_invalidate_config(void *token) 1312 { 1313 kcpc_ctx_t *ctx = token; 1314 1315 ASSERT(ctx != NULL); 1316 1317 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1318 } 1319 1320 /* 1321 * Called from lwp_exit() and thread_exit() 1322 */ 1323 void 1324 kcpc_passivate(void) 1325 { 1326 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1327 kcpc_set_t *set = curthread->t_cpc_set; 1328 1329 if (set == NULL) 1330 return; 1331 1332 /* 1333 * We're cleaning up after this thread; ensure there are no dangling 1334 * CPC pointers left behind. The context and set will be freed by 1335 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1336 * the case of a CPU-bound set. 1337 */ 1338 curthread->t_cpc_ctx = NULL; 1339 1340 if (ctx == NULL) { 1341 /* 1342 * This thread has a set but no context; it must be a CPU-bound 1343 * set. The hardware will be stopped via kcpc_unbind() when the 1344 * process exits and closes its file descriptors with 1345 * kcpc_close(). Our only job here is to clean up this thread's 1346 * state; the set will be freed with the unbind(). 1347 */ 1348 (void) kcpc_unbind(set); 1349 /* 1350 * Unbinding a set belonging to the current thread should clear 1351 * its set pointer. 1352 */ 1353 ASSERT(curthread->t_cpc_set == NULL); 1354 return; 1355 } 1356 1357 curthread->t_cpc_set = NULL; 1358 1359 /* 1360 * This thread/LWP is exiting but context switches will continue to 1361 * happen for a bit as the exit proceeds. Kernel preemption must be 1362 * disabled here to prevent a race between checking or setting the 1363 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1364 * a context switch. 1365 */ 1366 1367 kpreempt_disable(); 1368 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1369 pcbe_ops->pcbe_allstop(); 1370 atomic_or_uint(&ctx->kc_flags, 1371 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1372 } 1373 kpreempt_enable(); 1374 } 1375 1376 /* 1377 * Assign the requests in the given set to the PICs in the context. 1378 * Returns 0 if successful, -1 on failure. 1379 */ 1380 /*ARGSUSED*/ 1381 static int 1382 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1383 { 1384 int i; 1385 int *picnum_save; 1386 1387 ASSERT(set->ks_nreqs <= cpc_ncounters); 1388 1389 /* 1390 * Provide kcpc_tryassign() with scratch space to avoid doing an 1391 * alloc/free with every invocation. 1392 */ 1393 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1394 /* 1395 * kcpc_tryassign() blindly walks through each request in the set, 1396 * seeing if a counter can count its event. If yes, it assigns that 1397 * counter. However, that counter may have been the only capable counter 1398 * for _another_ request's event. The solution is to try every possible 1399 * request first. Note that this does not cover all solutions, as 1400 * that would require all unique orderings of requests, an n^n operation 1401 * which would be unacceptable for architectures with many counters. 1402 */ 1403 for (i = 0; i < set->ks_nreqs; i++) 1404 if (kcpc_tryassign(set, i, picnum_save) == 0) 1405 break; 1406 1407 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1408 if (i == set->ks_nreqs) 1409 return (-1); 1410 return (0); 1411 } 1412 1413 static int 1414 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1415 { 1416 int i; 1417 int j; 1418 uint64_t bitmap = 0, resmap = 0; 1419 uint64_t ctrmap; 1420 1421 /* 1422 * We are attempting to assign the reqs to pics, but we may fail. If we 1423 * fail, we need to restore the state of the requests to what it was 1424 * when we found it, as some reqs may have been explicitly assigned to 1425 * a specific PIC beforehand. We do this by snapshotting the assignments 1426 * now and restoring from it later if we fail. 1427 * 1428 * Also we note here which counters have already been claimed by 1429 * requests with explicit counter assignments. 1430 */ 1431 for (i = 0; i < set->ks_nreqs; i++) { 1432 scratch[i] = set->ks_req[i].kr_picnum; 1433 if (set->ks_req[i].kr_picnum != -1) 1434 resmap |= (1 << set->ks_req[i].kr_picnum); 1435 } 1436 1437 /* 1438 * Walk through requests assigning them to the first PIC that is 1439 * capable. 1440 */ 1441 i = starting_req; 1442 do { 1443 if (set->ks_req[i].kr_picnum != -1) { 1444 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1445 bitmap |= (1 << set->ks_req[i].kr_picnum); 1446 if (++i == set->ks_nreqs) 1447 i = 0; 1448 continue; 1449 } 1450 1451 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1452 for (j = 0; j < cpc_ncounters; j++) { 1453 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1454 (resmap & (1 << j)) == 0) { 1455 /* 1456 * We can assign this counter because: 1457 * 1458 * 1. It can count the event (ctrmap) 1459 * 2. It hasn't been assigned yet (bitmap) 1460 * 3. It wasn't reserved by a request (resmap) 1461 */ 1462 bitmap |= (1 << j); 1463 break; 1464 } 1465 } 1466 if (j == cpc_ncounters) { 1467 for (i = 0; i < set->ks_nreqs; i++) 1468 set->ks_req[i].kr_picnum = scratch[i]; 1469 return (-1); 1470 } 1471 set->ks_req[i].kr_picnum = j; 1472 1473 if (++i == set->ks_nreqs) 1474 i = 0; 1475 } while (i != starting_req); 1476 1477 return (0); 1478 } 1479 1480 kcpc_set_t * 1481 kcpc_dup_set(kcpc_set_t *set) 1482 { 1483 kcpc_set_t *new; 1484 int i; 1485 int j; 1486 1487 new = kmem_alloc(sizeof (*new), KM_SLEEP); 1488 new->ks_flags = set->ks_flags; 1489 new->ks_nreqs = set->ks_nreqs; 1490 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1491 KM_SLEEP); 1492 new->ks_data = NULL; 1493 new->ks_ctx = NULL; 1494 1495 for (i = 0; i < new->ks_nreqs; i++) { 1496 new->ks_req[i].kr_config = NULL; 1497 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1498 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1499 new->ks_req[i].kr_picp = NULL; 1500 new->ks_req[i].kr_data = NULL; 1501 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1502 CPC_MAX_EVENT_LEN); 1503 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1504 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1505 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1506 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1507 sizeof (kcpc_attr_t), KM_SLEEP); 1508 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1509 new->ks_req[i].kr_attr[j].ka_val = 1510 set->ks_req[i].kr_attr[j].ka_val; 1511 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1512 set->ks_req[i].kr_attr[j].ka_name, 1513 CPC_MAX_ATTR_LEN); 1514 } 1515 } 1516 1517 return (new); 1518 } 1519 1520 int 1521 kcpc_allow_nonpriv(void *token) 1522 { 1523 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1524 } 1525 1526 void 1527 kcpc_invalidate(kthread_t *t) 1528 { 1529 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1530 1531 if (ctx != NULL) 1532 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1533 } 1534 1535 /* 1536 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1537 * are used to construct PCBE names, starting with the most specific, 1538 * "pcbe.first.second.third.fourth" and ending with the least specific, 1539 * "pcbe.first". 1540 * 1541 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1542 */ 1543 int 1544 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1545 { 1546 uint_t s[3]; 1547 1548 s[0] = first; 1549 s[1] = second; 1550 s[2] = third; 1551 1552 return (modload_qualified("pcbe", 1553 "pcbe", prefix, ".", s, 3) < 0 ? -1 : 0); 1554 } 1555