1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/inttypes.h> 33 #include <sys/cmn_err.h> 34 #include <sys/time.h> 35 #include <sys/mutex.h> 36 #include <sys/systm.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_impl.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/atomic.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #if defined(__x86) 45 #include <asm/clock.h> 46 #endif 47 48 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 49 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 50 51 52 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 53 int kcpc_cpuctx; /* number of cpu-specific contexts */ 54 55 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 56 57 /* 58 * These are set when a PCBE module is loaded. 59 */ 60 uint_t cpc_ncounters = 0; 61 pcbe_ops_t *pcbe_ops = NULL; 62 63 /* 64 * Statistics on (mis)behavior 65 */ 66 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 67 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 68 69 /* 70 * Is misbehaviour (overflow in a thread with no context) fatal? 71 */ 72 #ifdef DEBUG 73 static int kcpc_nullctx_panic = 1; 74 #else 75 static int kcpc_nullctx_panic = 0; 76 #endif 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 82 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 83 static void kcpc_free_configs(kcpc_set_t *set); 84 static kcpc_ctx_t *kcpc_ctx_alloc(void); 85 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 86 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 87 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 88 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 89 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 90 91 void 92 kcpc_register_pcbe(pcbe_ops_t *ops) 93 { 94 pcbe_ops = ops; 95 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 96 } 97 98 int 99 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 100 { 101 cpu_t *cp; 102 kcpc_ctx_t *ctx; 103 int error; 104 105 ctx = kcpc_ctx_alloc(); 106 107 if (kcpc_assign_reqs(set, ctx) != 0) { 108 kcpc_ctx_free(ctx); 109 *subcode = CPC_RESOURCE_UNAVAIL; 110 return (EINVAL); 111 } 112 113 ctx->kc_cpuid = cpuid; 114 ctx->kc_thread = curthread; 115 116 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 117 118 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 119 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 120 kcpc_ctx_free(ctx); 121 return (error); 122 } 123 124 set->ks_ctx = ctx; 125 ctx->kc_set = set; 126 127 /* 128 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 129 * we are manipulating the cpu_t and programming the hardware, else the 130 * the cpu_t could go away while we're looking at it. 131 */ 132 mutex_enter(&cpu_lock); 133 cp = cpu_get(cpuid); 134 135 if (cp == NULL) 136 /* 137 * The CPU could have been DRd out while we were getting set up. 138 */ 139 goto unbound; 140 141 mutex_enter(&cp->cpu_cpc_ctxlock); 142 143 if (cp->cpu_cpc_ctx != NULL) { 144 /* 145 * If this CPU already has a bound set, return an error. 146 */ 147 mutex_exit(&cp->cpu_cpc_ctxlock); 148 goto unbound; 149 } 150 151 if (curthread->t_bind_cpu != cpuid) { 152 mutex_exit(&cp->cpu_cpc_ctxlock); 153 goto unbound; 154 } 155 cp->cpu_cpc_ctx = ctx; 156 157 /* 158 * Kernel preemption must be disabled while fiddling with the hardware 159 * registers to prevent partial updates. 160 */ 161 kpreempt_disable(); 162 ctx->kc_rawtick = KCPC_GET_TICK(); 163 pcbe_ops->pcbe_program(ctx); 164 kpreempt_enable(); 165 166 mutex_exit(&cp->cpu_cpc_ctxlock); 167 mutex_exit(&cpu_lock); 168 169 return (0); 170 171 unbound: 172 mutex_exit(&cpu_lock); 173 set->ks_ctx = NULL; 174 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 175 kcpc_ctx_free(ctx); 176 return (EAGAIN); 177 } 178 179 int 180 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 181 { 182 kcpc_ctx_t *ctx; 183 int error; 184 185 /* 186 * Only one set is allowed per context, so ensure there is no 187 * existing context. 188 */ 189 190 if (t->t_cpc_ctx != NULL) 191 return (EEXIST); 192 193 ctx = kcpc_ctx_alloc(); 194 195 /* 196 * The context must begin life frozen until it has been properly 197 * programmed onto the hardware. This prevents the context ops from 198 * worrying about it until we're ready. 199 */ 200 ctx->kc_flags |= KCPC_CTX_FREEZE; 201 ctx->kc_hrtime = gethrtime(); 202 203 if (kcpc_assign_reqs(set, ctx) != 0) { 204 kcpc_ctx_free(ctx); 205 *subcode = CPC_RESOURCE_UNAVAIL; 206 return (EINVAL); 207 } 208 209 ctx->kc_cpuid = -1; 210 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 211 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 212 ctx->kc_thread = t; 213 t->t_cpc_ctx = ctx; 214 /* 215 * Permit threads to look at their own hardware counters from userland. 216 */ 217 ctx->kc_flags |= KCPC_CTX_NONPRIV; 218 219 /* 220 * Create the data store for this set. 221 */ 222 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 223 224 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 225 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 226 kcpc_ctx_free(ctx); 227 t->t_cpc_ctx = NULL; 228 return (error); 229 } 230 231 set->ks_ctx = ctx; 232 ctx->kc_set = set; 233 234 /* 235 * Add a device context to the subject thread. 236 */ 237 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 238 kcpc_lwp_create, NULL, kcpc_free); 239 240 /* 241 * Ask the backend to program the hardware. 242 */ 243 if (t == curthread) { 244 kpreempt_disable(); 245 ctx->kc_rawtick = KCPC_GET_TICK(); 246 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 247 pcbe_ops->pcbe_program(ctx); 248 kpreempt_enable(); 249 } else 250 /* 251 * Since we are the agent LWP, we know the victim LWP is stopped 252 * until we're done here; no need to worry about preemption or 253 * migration here. We still use an atomic op to clear the flag 254 * to ensure the flags are always self-consistent; they can 255 * still be accessed from, for instance, another CPU doing a 256 * kcpc_invalidate_all(). 257 */ 258 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 259 260 261 return (0); 262 } 263 264 /* 265 * Walk through each request in the set and ask the PCBE to configure a 266 * corresponding counter. 267 */ 268 static int 269 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 270 { 271 int i; 272 int ret; 273 kcpc_request_t *rp; 274 275 for (i = 0; i < set->ks_nreqs; i++) { 276 int n; 277 rp = &set->ks_req[i]; 278 279 n = rp->kr_picnum; 280 281 ASSERT(n >= 0 && n < cpc_ncounters); 282 283 ASSERT(ctx->kc_pics[n].kp_req == NULL); 284 285 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 286 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 287 == 0) { 288 *subcode = -1; 289 return (ENOTSUP); 290 } 291 /* 292 * If any of the counters have requested overflow 293 * notification, we flag the context as being one that 294 * cares about overflow. 295 */ 296 ctx->kc_flags |= KCPC_CTX_SIGOVF; 297 } 298 299 rp->kr_config = NULL; 300 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 301 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 302 &(rp->kr_config), (void *)ctx)) != 0) { 303 kcpc_free_configs(set); 304 *subcode = ret; 305 if (ret == CPC_ATTR_REQUIRES_PRIVILEGE) 306 return (EACCES); 307 return (EINVAL); 308 } 309 310 ctx->kc_pics[n].kp_req = rp; 311 rp->kr_picp = &ctx->kc_pics[n]; 312 rp->kr_data = set->ks_data + rp->kr_index; 313 *rp->kr_data = rp->kr_preset; 314 } 315 316 return (0); 317 } 318 319 static void 320 kcpc_free_configs(kcpc_set_t *set) 321 { 322 int i; 323 324 for (i = 0; i < set->ks_nreqs; i++) 325 if (set->ks_req[i].kr_config != NULL) 326 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 327 } 328 329 /* 330 * buf points to a user address and the data should be copied out to that 331 * address in the current process. 332 */ 333 int 334 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 335 { 336 kcpc_ctx_t *ctx = set->ks_ctx; 337 uint64_t curtick = KCPC_GET_TICK(); 338 339 if (ctx == NULL) 340 return (EINVAL); 341 else if (ctx->kc_flags & KCPC_CTX_INVALID) 342 return (EAGAIN); 343 344 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 345 /* 346 * Kernel preemption must be disabled while reading the 347 * hardware regs, and if this is a CPU-bound context, while 348 * checking the CPU binding of the current thread. 349 */ 350 kpreempt_disable(); 351 352 if (ctx->kc_cpuid != -1) { 353 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 354 kpreempt_enable(); 355 return (EAGAIN); 356 } 357 } 358 359 if (ctx->kc_thread == curthread) { 360 ctx->kc_hrtime = gethrtime(); 361 pcbe_ops->pcbe_sample(ctx); 362 ctx->kc_vtick += curtick - ctx->kc_rawtick; 363 ctx->kc_rawtick = curtick; 364 } 365 366 kpreempt_enable(); 367 } 368 369 if (copyout(set->ks_data, buf, 370 set->ks_nreqs * sizeof (uint64_t)) == -1) 371 return (EFAULT); 372 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 373 return (EFAULT); 374 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 375 return (EFAULT); 376 377 return (0); 378 } 379 380 /* 381 * Stop the counters on the CPU this context is bound to. 382 */ 383 static void 384 kcpc_stop_hw(kcpc_ctx_t *ctx) 385 { 386 cpu_t *cp; 387 388 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 389 == KCPC_CTX_INVALID); 390 391 kpreempt_disable(); 392 393 cp = cpu_get(ctx->kc_cpuid); 394 ASSERT(cp != NULL); 395 396 if (cp == CPU) { 397 pcbe_ops->pcbe_allstop(); 398 atomic_or_uint(&ctx->kc_flags, 399 KCPC_CTX_INVALID_STOPPED); 400 } else 401 kcpc_remote_stop(cp); 402 kpreempt_enable(); 403 } 404 405 int 406 kcpc_unbind(kcpc_set_t *set) 407 { 408 kcpc_ctx_t *ctx = set->ks_ctx; 409 kthread_t *t; 410 411 if (ctx == NULL) 412 return (EINVAL); 413 414 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 415 416 if (ctx->kc_cpuid == -1) { 417 t = ctx->kc_thread; 418 /* 419 * The context is thread-bound and therefore has a device 420 * context. It will be freed via removectx() calling 421 * freectx() calling kcpc_free(). 422 */ 423 if (t == curthread && 424 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 425 kpreempt_disable(); 426 pcbe_ops->pcbe_allstop(); 427 atomic_or_uint(&ctx->kc_flags, 428 KCPC_CTX_INVALID_STOPPED); 429 kpreempt_enable(); 430 } 431 #ifdef DEBUG 432 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 433 kcpc_lwp_create, NULL, kcpc_free) == 0) 434 panic("kcpc_unbind: context %p not preset on thread %p", 435 ctx, t); 436 #else 437 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 438 kcpc_lwp_create, NULL, kcpc_free); 439 #endif /* DEBUG */ 440 t->t_cpc_set = NULL; 441 t->t_cpc_ctx = NULL; 442 } else { 443 /* 444 * If we are unbinding a CPU-bound set from a remote CPU, the 445 * native CPU's idle thread could be in the midst of programming 446 * this context onto the CPU. We grab the context's lock here to 447 * ensure that the idle thread is done with it. When we release 448 * the lock, the CPU no longer has a context and the idle thread 449 * will move on. 450 * 451 * cpu_lock must be held to prevent the CPU from being DR'd out 452 * while we disassociate the context from the cpu_t. 453 */ 454 cpu_t *cp; 455 mutex_enter(&cpu_lock); 456 cp = cpu_get(ctx->kc_cpuid); 457 if (cp != NULL) { 458 /* 459 * The CPU may have been DR'd out of the system. 460 */ 461 mutex_enter(&cp->cpu_cpc_ctxlock); 462 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 463 kcpc_stop_hw(ctx); 464 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 465 cp->cpu_cpc_ctx = NULL; 466 mutex_exit(&cp->cpu_cpc_ctxlock); 467 } 468 mutex_exit(&cpu_lock); 469 if (ctx->kc_thread == curthread) { 470 kcpc_free(ctx, 0); 471 curthread->t_cpc_set = NULL; 472 } 473 } 474 475 return (0); 476 } 477 478 int 479 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 480 { 481 int i; 482 483 ASSERT(set != NULL); 484 ASSERT(set->ks_ctx != NULL); 485 ASSERT(set->ks_ctx->kc_thread == curthread); 486 ASSERT(set->ks_ctx->kc_cpuid == -1); 487 488 if (index < 0 || index >= set->ks_nreqs) 489 return (EINVAL); 490 491 for (i = 0; i < set->ks_nreqs; i++) 492 if (set->ks_req[i].kr_index == index) 493 break; 494 ASSERT(i != set->ks_nreqs); 495 496 set->ks_req[i].kr_preset = preset; 497 return (0); 498 } 499 500 int 501 kcpc_restart(kcpc_set_t *set) 502 { 503 kcpc_ctx_t *ctx = set->ks_ctx; 504 int i; 505 506 ASSERT(ctx != NULL); 507 ASSERT(ctx->kc_thread == curthread); 508 ASSERT(ctx->kc_cpuid == -1); 509 510 kpreempt_disable(); 511 512 /* 513 * If the user is doing this on a running set, make sure the counters 514 * are stopped first. 515 */ 516 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 517 pcbe_ops->pcbe_allstop(); 518 519 for (i = 0; i < set->ks_nreqs; i++) { 520 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 521 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 522 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 523 } 524 525 /* 526 * Ask the backend to program the hardware. 527 */ 528 ctx->kc_rawtick = KCPC_GET_TICK(); 529 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 530 pcbe_ops->pcbe_program(ctx); 531 kpreempt_enable(); 532 533 return (0); 534 } 535 536 /* 537 * Caller must hold kcpc_cpuctx_lock. 538 */ 539 int 540 kcpc_enable(kthread_t *t, int cmd, int enable) 541 { 542 kcpc_ctx_t *ctx = t->t_cpc_ctx; 543 kcpc_set_t *set = t->t_cpc_set; 544 kcpc_set_t *newset; 545 int i; 546 int flag; 547 int err; 548 549 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 550 551 if (ctx == NULL) { 552 /* 553 * This thread has a set but no context; it must be a 554 * CPU-bound set. 555 */ 556 ASSERT(t->t_cpc_set != NULL); 557 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 558 return (EINVAL); 559 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 560 return (EAGAIN); 561 562 if (cmd == CPC_ENABLE) { 563 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 564 return (EINVAL); 565 kpreempt_disable(); 566 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 567 kcpc_restore(ctx); 568 kpreempt_enable(); 569 } else if (cmd == CPC_DISABLE) { 570 if (ctx->kc_flags & KCPC_CTX_FREEZE) 571 return (EINVAL); 572 kpreempt_disable(); 573 kcpc_save(ctx); 574 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 575 kpreempt_enable(); 576 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 577 /* 578 * Strategy for usr/sys: stop counters and update set's presets 579 * with current counter values, unbind, update requests with 580 * new config, then re-bind. 581 */ 582 flag = (cmd == CPC_USR_EVENTS) ? 583 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 584 585 kpreempt_disable(); 586 atomic_or_uint(&ctx->kc_flags, 587 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 588 pcbe_ops->pcbe_allstop(); 589 kpreempt_enable(); 590 for (i = 0; i < set->ks_nreqs; i++) { 591 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 592 if (enable) 593 set->ks_req[i].kr_flags |= flag; 594 else 595 set->ks_req[i].kr_flags &= ~flag; 596 } 597 newset = kcpc_dup_set(set); 598 if (kcpc_unbind(set) != 0) 599 return (EINVAL); 600 t->t_cpc_set = newset; 601 if (kcpc_bind_thread(newset, t, &err) != 0) { 602 t->t_cpc_set = NULL; 603 kcpc_free_set(newset); 604 return (EINVAL); 605 } 606 } else 607 return (EINVAL); 608 609 return (0); 610 } 611 612 /* 613 * Provide PCBEs with a way of obtaining the configs of every counter which will 614 * be programmed together. 615 * 616 * If current is NULL, provide the first config. 617 * 618 * If data != NULL, caller wants to know where the data store associated with 619 * the config we return is located. 620 */ 621 void * 622 kcpc_next_config(void *token, void *current, uint64_t **data) 623 { 624 int i; 625 kcpc_pic_t *pic; 626 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 627 628 if (current == NULL) { 629 /* 630 * Client would like the first config, which may not be in 631 * counter 0; we need to search through the counters for the 632 * first config. 633 */ 634 for (i = 0; i < cpc_ncounters; i++) 635 if (ctx->kc_pics[i].kp_req != NULL) 636 break; 637 /* 638 * There are no counters configured for the given context. 639 */ 640 if (i == cpc_ncounters) 641 return (NULL); 642 } else { 643 /* 644 * There surely is a faster way to do this. 645 */ 646 for (i = 0; i < cpc_ncounters; i++) { 647 pic = &ctx->kc_pics[i]; 648 649 if (pic->kp_req != NULL && 650 current == pic->kp_req->kr_config) 651 break; 652 } 653 654 /* 655 * We found the current config at picnum i. Now search for the 656 * next configured PIC. 657 */ 658 for (i++; i < cpc_ncounters; i++) { 659 pic = &ctx->kc_pics[i]; 660 if (pic->kp_req != NULL) 661 break; 662 } 663 664 if (i == cpc_ncounters) 665 return (NULL); 666 } 667 668 if (data != NULL) { 669 *data = ctx->kc_pics[i].kp_req->kr_data; 670 } 671 672 return (ctx->kc_pics[i].kp_req->kr_config); 673 } 674 675 676 static kcpc_ctx_t * 677 kcpc_ctx_alloc(void) 678 { 679 kcpc_ctx_t *ctx; 680 long hash; 681 682 ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP); 683 684 hash = CPC_HASH_CTX(ctx); 685 mutex_enter(&kcpc_ctx_llock[hash]); 686 ctx->kc_next = kcpc_ctx_list[hash]; 687 kcpc_ctx_list[hash] = ctx; 688 mutex_exit(&kcpc_ctx_llock[hash]); 689 690 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 691 cpc_ncounters, KM_SLEEP); 692 693 ctx->kc_flags = 0; 694 ctx->kc_vtick = 0; 695 ctx->kc_rawtick = 0; 696 ctx->kc_cpuid = -1; 697 698 return (ctx); 699 } 700 701 /* 702 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 703 * in the flags. 704 */ 705 static void 706 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 707 { 708 kcpc_set_t *ks = ctx->kc_set, *cks; 709 int i, j; 710 int code; 711 712 ASSERT(ks != NULL); 713 714 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 715 return; 716 717 cks = kmem_alloc(sizeof (*cks), KM_SLEEP); 718 cctx->kc_set = cks; 719 cks->ks_flags = ks->ks_flags; 720 cks->ks_nreqs = ks->ks_nreqs; 721 cks->ks_req = kmem_alloc(cks->ks_nreqs * 722 sizeof (kcpc_request_t), KM_SLEEP); 723 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 724 KM_SLEEP); 725 cks->ks_ctx = cctx; 726 727 for (i = 0; i < cks->ks_nreqs; i++) { 728 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 729 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 730 (void) strncpy(cks->ks_req[i].kr_event, 731 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 732 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 733 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 734 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 735 if (ks->ks_req[i].kr_nattrs > 0) { 736 cks->ks_req[i].kr_attr = 737 kmem_alloc(ks->ks_req[i].kr_nattrs * 738 sizeof (kcpc_attr_t), KM_SLEEP); 739 } 740 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 741 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 742 ks->ks_req[i].kr_attr[j].ka_name, 743 CPC_MAX_ATTR_LEN); 744 cks->ks_req[i].kr_attr[j].ka_val = 745 ks->ks_req[i].kr_attr[j].ka_val; 746 } 747 } 748 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 749 panic("kcpc_ctx_clone: configure of context %p with " 750 "set %p failed with subcode %d", cctx, cks, code); 751 } 752 753 754 static void 755 kcpc_ctx_free(kcpc_ctx_t *ctx) 756 { 757 kcpc_ctx_t **loc; 758 long hash = CPC_HASH_CTX(ctx); 759 760 mutex_enter(&kcpc_ctx_llock[hash]); 761 loc = &kcpc_ctx_list[hash]; 762 ASSERT(*loc != NULL); 763 while (*loc != ctx) 764 loc = &(*loc)->kc_next; 765 *loc = ctx->kc_next; 766 mutex_exit(&kcpc_ctx_llock[hash]); 767 768 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 769 kmem_free(ctx, sizeof (*ctx)); 770 } 771 772 /* 773 * Generic interrupt handler used on hardware that generates 774 * overflow interrupts. 775 * 776 * Note: executed at high-level interrupt context! 777 */ 778 /*ARGSUSED*/ 779 kcpc_ctx_t * 780 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 781 { 782 kcpc_ctx_t *ctx; 783 kthread_t *t = curthread; 784 int i; 785 786 /* 787 * On both x86 and UltraSPARC, we may deliver the high-level 788 * interrupt in kernel mode, just after we've started to run an 789 * interrupt thread. (That's because the hardware helpfully 790 * delivers the overflow interrupt some random number of cycles 791 * after the instruction that caused the overflow by which time 792 * we're in some part of the kernel, not necessarily running on 793 * the right thread). 794 * 795 * Check for this case here -- find the pinned thread 796 * that was running when the interrupt went off. 797 */ 798 if (t->t_flag & T_INTR_THREAD) { 799 klwp_t *lwp; 800 801 atomic_add_32(&kcpc_intrctx_count, 1); 802 803 /* 804 * Note that t_lwp is always set to point at the underlying 805 * thread, thus this will work in the presence of nested 806 * interrupts. 807 */ 808 ctx = NULL; 809 if ((lwp = t->t_lwp) != NULL) { 810 t = lwptot(lwp); 811 ctx = t->t_cpc_ctx; 812 } 813 } else 814 ctx = t->t_cpc_ctx; 815 816 if (ctx == NULL) { 817 /* 818 * This can easily happen if we're using the counters in 819 * "shared" mode, for example, and an overflow interrupt 820 * occurs while we are running cpustat. In that case, the 821 * bound thread that has the context that belongs to this 822 * CPU is almost certainly sleeping (if it was running on 823 * the CPU we'd have found it above), and the actual 824 * interrupted thread has no knowledge of performance counters! 825 */ 826 ctx = curthread->t_cpu->cpu_cpc_ctx; 827 if (ctx != NULL) { 828 /* 829 * Return the bound context for this CPU to 830 * the interrupt handler so that it can synchronously 831 * sample the hardware counters and restart them. 832 */ 833 return (ctx); 834 } 835 836 /* 837 * As long as the overflow interrupt really is delivered early 838 * enough after trapping into the kernel to avoid switching 839 * threads, we must always be able to find the cpc context, 840 * or something went terribly wrong i.e. we ended up 841 * running a passivated interrupt thread, a kernel 842 * thread or we interrupted idle, all of which are Very Bad. 843 */ 844 if (kcpc_nullctx_panic) 845 panic("null cpc context, thread %p", (void *)t); 846 atomic_add_32(&kcpc_nullctx_count, 1); 847 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 848 /* 849 * Schedule an ast to sample the counters, which will 850 * propagate any overflow into the virtualized performance 851 * counter(s), and may deliver a signal. 852 */ 853 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 854 /* 855 * If a counter has overflowed which was counting on behalf of 856 * a request which specified CPC_OVF_NOTIFY_EMT, send the 857 * process a signal. 858 */ 859 for (i = 0; i < cpc_ncounters; i++) { 860 if (ctx->kc_pics[i].kp_req != NULL && 861 bitmap & (1 << i) && 862 ctx->kc_pics[i].kp_req->kr_flags & 863 CPC_OVF_NOTIFY_EMT) { 864 /* 865 * A signal has been requested for this PIC, so 866 * so freeze the context. The interrupt handler 867 * has already stopped the counter hardware. 868 */ 869 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 870 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 871 KCPC_PIC_OVERFLOWED); 872 } 873 } 874 aston(t); 875 } 876 return (NULL); 877 } 878 879 /* 880 * The current thread context had an overflow interrupt; we're 881 * executing here in high-level interrupt context. 882 */ 883 /*ARGSUSED*/ 884 uint_t 885 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 886 { 887 kcpc_ctx_t *ctx; 888 uint64_t bitmap; 889 890 if (pcbe_ops == NULL || 891 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 892 return (DDI_INTR_UNCLAIMED); 893 #ifdef N2_ERRATUM_134 894 /* 895 * Check if any of the supported counters overflowed. If 896 * not, it's a spurious overflow trap (Niagara2 1.x silicon 897 * bug). Ignore this trap. 898 */ 899 if ((bitmap & ((1 <<cpc_ncounters)-1)) == 0) 900 return (DDI_INTR_CLAIMED); 901 #endif 902 /* 903 * Prevent any further interrupts. 904 */ 905 pcbe_ops->pcbe_allstop(); 906 907 /* 908 * Invoke the "generic" handler. 909 * 910 * If the interrupt has occurred in the context of an lwp owning 911 * the counters, then the handler posts an AST to the lwp to 912 * trigger the actual sampling, and optionally deliver a signal or 913 * restart the counters, on the way out of the kernel using 914 * kcpc_hw_overflow_ast() (see below). 915 * 916 * On the other hand, if the handler returns the context to us 917 * directly, then it means that there are no other threads in 918 * the middle of updating it, no AST has been posted, and so we 919 * should sample the counters here, and restart them with no 920 * further fuss. 921 */ 922 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 923 uint64_t curtick = KCPC_GET_TICK(); 924 925 ctx->kc_hrtime = gethrtime_waitfree(); 926 ctx->kc_vtick += curtick - ctx->kc_rawtick; 927 ctx->kc_rawtick = curtick; 928 pcbe_ops->pcbe_sample(ctx); 929 pcbe_ops->pcbe_program(ctx); 930 } 931 932 return (DDI_INTR_CLAIMED); 933 } 934 935 /* 936 * Called from trap() when processing the ast posted by the high-level 937 * interrupt handler. 938 */ 939 int 940 kcpc_overflow_ast() 941 { 942 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 943 int i; 944 int found = 0; 945 uint64_t curtick = KCPC_GET_TICK(); 946 947 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 948 949 /* 950 * An overflow happened: sample the context to ensure that 951 * the overflow is propagated into the upper bits of the 952 * virtualized 64-bit counter(s). 953 */ 954 kpreempt_disable(); 955 ctx->kc_hrtime = gethrtime_waitfree(); 956 pcbe_ops->pcbe_sample(ctx); 957 kpreempt_enable(); 958 959 ctx->kc_vtick += curtick - ctx->kc_rawtick; 960 961 /* 962 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 963 * if that pic generated an overflow and if the request it was counting 964 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 965 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 966 * found any overflowed pics, keep the context frozen and return true 967 * (thus causing a signal to be sent). 968 */ 969 for (i = 0; i < cpc_ncounters; i++) { 970 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 971 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 972 ~KCPC_PIC_OVERFLOWED); 973 found = 1; 974 } 975 } 976 if (found) 977 return (1); 978 979 /* 980 * Otherwise, re-enable the counters and continue life as before. 981 */ 982 kpreempt_disable(); 983 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 984 pcbe_ops->pcbe_program(ctx); 985 kpreempt_enable(); 986 return (0); 987 } 988 989 /* 990 * Called when switching away from current thread. 991 */ 992 static void 993 kcpc_save(kcpc_ctx_t *ctx) 994 { 995 if (ctx->kc_flags & KCPC_CTX_INVALID) { 996 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 997 return; 998 /* 999 * This context has been invalidated but the counters have not 1000 * been stopped. Stop them here and mark the context stopped. 1001 */ 1002 pcbe_ops->pcbe_allstop(); 1003 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1004 return; 1005 } 1006 1007 pcbe_ops->pcbe_allstop(); 1008 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1009 return; 1010 1011 /* 1012 * Need to sample for all reqs into each req's current mpic. 1013 */ 1014 ctx->kc_hrtime = gethrtime(); 1015 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1016 pcbe_ops->pcbe_sample(ctx); 1017 } 1018 1019 static void 1020 kcpc_restore(kcpc_ctx_t *ctx) 1021 { 1022 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1023 KCPC_CTX_INVALID) 1024 /* 1025 * The context is invalidated but has not been marked stopped. 1026 * We mark it as such here because we will not start the 1027 * counters during this context switch. 1028 */ 1029 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1030 1031 1032 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) 1033 return; 1034 1035 /* 1036 * While programming the hardware, the counters should be stopped. We 1037 * don't do an explicit pcbe_allstop() here because they should have 1038 * been stopped already by the last consumer. 1039 */ 1040 ctx->kc_rawtick = KCPC_GET_TICK(); 1041 pcbe_ops->pcbe_program(ctx); 1042 } 1043 1044 /* 1045 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1046 * following context operators to the idle thread on each CPU. They stop the 1047 * counters when the idle thread is switched on, and they start them again when 1048 * it is switched off. 1049 */ 1050 1051 /*ARGSUSED*/ 1052 void 1053 kcpc_idle_save(struct cpu *cp) 1054 { 1055 /* 1056 * The idle thread shouldn't be run anywhere else. 1057 */ 1058 ASSERT(CPU == cp); 1059 1060 /* 1061 * We must hold the CPU's context lock to ensure the context isn't freed 1062 * while we're looking at it. 1063 */ 1064 mutex_enter(&cp->cpu_cpc_ctxlock); 1065 1066 if ((cp->cpu_cpc_ctx == NULL) || 1067 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1068 mutex_exit(&cp->cpu_cpc_ctxlock); 1069 return; 1070 } 1071 1072 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1073 mutex_exit(&cp->cpu_cpc_ctxlock); 1074 } 1075 1076 void 1077 kcpc_idle_restore(struct cpu *cp) 1078 { 1079 /* 1080 * The idle thread shouldn't be run anywhere else. 1081 */ 1082 ASSERT(CPU == cp); 1083 1084 /* 1085 * We must hold the CPU's context lock to ensure the context isn't freed 1086 * while we're looking at it. 1087 */ 1088 mutex_enter(&cp->cpu_cpc_ctxlock); 1089 1090 if ((cp->cpu_cpc_ctx == NULL) || 1091 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1092 mutex_exit(&cp->cpu_cpc_ctxlock); 1093 return; 1094 } 1095 1096 pcbe_ops->pcbe_allstop(); 1097 mutex_exit(&cp->cpu_cpc_ctxlock); 1098 } 1099 1100 /*ARGSUSED*/ 1101 static void 1102 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1103 { 1104 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1105 int i; 1106 1107 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1108 return; 1109 1110 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1111 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1112 rw_exit(&kcpc_cpuctx_lock); 1113 return; 1114 } 1115 cctx = kcpc_ctx_alloc(); 1116 kcpc_ctx_clone(ctx, cctx); 1117 rw_exit(&kcpc_cpuctx_lock); 1118 1119 cctx->kc_flags = ctx->kc_flags; 1120 cctx->kc_thread = ct; 1121 cctx->kc_cpuid = -1; 1122 ct->t_cpc_set = cctx->kc_set; 1123 ct->t_cpc_ctx = cctx; 1124 1125 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1126 kcpc_set_t *ks = cctx->kc_set; 1127 /* 1128 * Our contract with the user requires us to immediately send an 1129 * overflow signal to all children if we have the LWPINHERIT 1130 * and SIGOVF flags set. In addition, all counters should be 1131 * set to UINT64_MAX, and their pic's overflow flag turned on 1132 * so that our trap() processing knows to send a signal. 1133 */ 1134 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1135 for (i = 0; i < ks->ks_nreqs; i++) { 1136 kcpc_request_t *kr = &ks->ks_req[i]; 1137 1138 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1139 *(kr->kr_data) = UINT64_MAX; 1140 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1141 } 1142 } 1143 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1144 aston(ct); 1145 } 1146 1147 installctx(ct, cctx, kcpc_save, kcpc_restore, 1148 NULL, kcpc_lwp_create, NULL, kcpc_free); 1149 } 1150 1151 /* 1152 * Counter Stoppage Theory 1153 * 1154 * The counters may need to be stopped properly at the following occasions: 1155 * 1156 * 1) An LWP exits. 1157 * 2) A thread exits. 1158 * 3) An LWP performs an exec(). 1159 * 4) A bound set is unbound. 1160 * 1161 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1162 * to be freed as well. 1163 * 1164 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1165 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1166 * context. 1167 * 1168 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1169 * 1170 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1171 * been called from exec. It stops the counters _and_ frees the context. 1172 * 1173 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1174 * 1175 * CPU-bound counters are always stopped via kcpc_unbind(). 1176 */ 1177 1178 /* 1179 * We're being called to delete the context; we ensure that all associated data 1180 * structures are freed, and that the hardware is passivated if this is an exec. 1181 */ 1182 1183 /*ARGSUSED*/ 1184 static void 1185 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1186 { 1187 int i; 1188 kcpc_set_t *set = ctx->kc_set; 1189 1190 ASSERT(set != NULL); 1191 1192 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1193 1194 if (isexec) { 1195 /* 1196 * This thread is execing, and after the exec it should not have 1197 * any performance counter context. Stop the counters properly 1198 * here so the system isn't surprised by an overflow interrupt 1199 * later. 1200 */ 1201 if (ctx->kc_cpuid != -1) { 1202 cpu_t *cp; 1203 /* 1204 * CPU-bound context; stop the appropriate CPU's ctrs. 1205 * Hold cpu_lock while examining the CPU to ensure it 1206 * doesn't go away. 1207 */ 1208 mutex_enter(&cpu_lock); 1209 cp = cpu_get(ctx->kc_cpuid); 1210 /* 1211 * The CPU could have been DR'd out, so only stop the 1212 * CPU and clear its context pointer if the CPU still 1213 * exists. 1214 */ 1215 if (cp != NULL) { 1216 mutex_enter(&cp->cpu_cpc_ctxlock); 1217 kcpc_stop_hw(ctx); 1218 cp->cpu_cpc_ctx = NULL; 1219 mutex_exit(&cp->cpu_cpc_ctxlock); 1220 } 1221 mutex_exit(&cpu_lock); 1222 ASSERT(curthread->t_cpc_ctx == NULL); 1223 } else { 1224 /* 1225 * Thread-bound context; stop _this_ CPU's counters. 1226 */ 1227 kpreempt_disable(); 1228 pcbe_ops->pcbe_allstop(); 1229 atomic_or_uint(&ctx->kc_flags, 1230 KCPC_CTX_INVALID_STOPPED); 1231 kpreempt_enable(); 1232 curthread->t_cpc_ctx = NULL; 1233 } 1234 1235 /* 1236 * Since we are being called from an exec and we know that 1237 * exec is not permitted via the agent thread, we should clean 1238 * up this thread's CPC state completely, and not leave dangling 1239 * CPC pointers behind. 1240 */ 1241 ASSERT(ctx->kc_thread == curthread); 1242 curthread->t_cpc_set = NULL; 1243 } 1244 1245 /* 1246 * Walk through each request in this context's set and free the PCBE's 1247 * configuration if it exists. 1248 */ 1249 for (i = 0; i < set->ks_nreqs; i++) { 1250 if (set->ks_req[i].kr_config != NULL) 1251 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1252 } 1253 1254 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1255 kcpc_ctx_free(ctx); 1256 kcpc_free_set(set); 1257 } 1258 1259 /* 1260 * Free the memory associated with a request set. 1261 */ 1262 void 1263 kcpc_free_set(kcpc_set_t *set) 1264 { 1265 int i; 1266 kcpc_request_t *req; 1267 1268 ASSERT(set->ks_req != NULL); 1269 1270 for (i = 0; i < set->ks_nreqs; i++) { 1271 req = &set->ks_req[i]; 1272 1273 if (req->kr_nattrs != 0) { 1274 kmem_free(req->kr_attr, 1275 req->kr_nattrs * sizeof (kcpc_attr_t)); 1276 } 1277 } 1278 1279 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1280 kmem_free(set, sizeof (kcpc_set_t)); 1281 } 1282 1283 /* 1284 * Grab every existing context and mark it as invalid. 1285 */ 1286 void 1287 kcpc_invalidate_all(void) 1288 { 1289 kcpc_ctx_t *ctx; 1290 long hash; 1291 1292 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1293 mutex_enter(&kcpc_ctx_llock[hash]); 1294 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1295 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1296 mutex_exit(&kcpc_ctx_llock[hash]); 1297 } 1298 } 1299 1300 /* 1301 * Called from lwp_exit() and thread_exit() 1302 */ 1303 void 1304 kcpc_passivate(void) 1305 { 1306 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1307 kcpc_set_t *set = curthread->t_cpc_set; 1308 1309 if (set == NULL) 1310 return; 1311 1312 /* 1313 * We're cleaning up after this thread; ensure there are no dangling 1314 * CPC pointers left behind. The context and set will be freed by 1315 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1316 * the case of a CPU-bound set. 1317 */ 1318 curthread->t_cpc_ctx = NULL; 1319 1320 if (ctx == NULL) { 1321 /* 1322 * This thread has a set but no context; it must be a CPU-bound 1323 * set. The hardware will be stopped via kcpc_unbind() when the 1324 * process exits and closes its file descriptors with 1325 * kcpc_close(). Our only job here is to clean up this thread's 1326 * state; the set will be freed with the unbind(). 1327 */ 1328 (void) kcpc_unbind(set); 1329 /* 1330 * Unbinding a set belonging to the current thread should clear 1331 * its set pointer. 1332 */ 1333 ASSERT(curthread->t_cpc_set == NULL); 1334 return; 1335 } 1336 1337 curthread->t_cpc_set = NULL; 1338 1339 /* 1340 * This thread/LWP is exiting but context switches will continue to 1341 * happen for a bit as the exit proceeds. Kernel preemption must be 1342 * disabled here to prevent a race between checking or setting the 1343 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1344 * a context switch. 1345 */ 1346 1347 kpreempt_disable(); 1348 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1349 pcbe_ops->pcbe_allstop(); 1350 atomic_or_uint(&ctx->kc_flags, 1351 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1352 } 1353 kpreempt_enable(); 1354 } 1355 1356 /* 1357 * Assign the requests in the given set to the PICs in the context. 1358 * Returns 0 if successful, -1 on failure. 1359 */ 1360 /*ARGSUSED*/ 1361 static int 1362 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1363 { 1364 int i; 1365 int *picnum_save; 1366 1367 ASSERT(set->ks_nreqs <= cpc_ncounters); 1368 1369 /* 1370 * Provide kcpc_tryassign() with scratch space to avoid doing an 1371 * alloc/free with every invocation. 1372 */ 1373 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1374 /* 1375 * kcpc_tryassign() blindly walks through each request in the set, 1376 * seeing if a counter can count its event. If yes, it assigns that 1377 * counter. However, that counter may have been the only capable counter 1378 * for _another_ request's event. The solution is to try every possible 1379 * request first. Note that this does not cover all solutions, as 1380 * that would require all unique orderings of requests, an n^n operation 1381 * which would be unacceptable for architectures with many counters. 1382 */ 1383 for (i = 0; i < set->ks_nreqs; i++) 1384 if (kcpc_tryassign(set, i, picnum_save) == 0) 1385 break; 1386 1387 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1388 if (i == set->ks_nreqs) 1389 return (-1); 1390 return (0); 1391 } 1392 1393 static int 1394 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1395 { 1396 int i; 1397 int j; 1398 uint64_t bitmap = 0, resmap = 0; 1399 uint64_t ctrmap; 1400 1401 /* 1402 * We are attempting to assign the reqs to pics, but we may fail. If we 1403 * fail, we need to restore the state of the requests to what it was 1404 * when we found it, as some reqs may have been explicitly assigned to 1405 * a specific PIC beforehand. We do this by snapshotting the assignments 1406 * now and restoring from it later if we fail. 1407 * 1408 * Also we note here which counters have already been claimed by 1409 * requests with explicit counter assignments. 1410 */ 1411 for (i = 0; i < set->ks_nreqs; i++) { 1412 scratch[i] = set->ks_req[i].kr_picnum; 1413 if (set->ks_req[i].kr_picnum != -1) 1414 resmap |= (1 << set->ks_req[i].kr_picnum); 1415 } 1416 1417 /* 1418 * Walk through requests assigning them to the first PIC that is 1419 * capable. 1420 */ 1421 i = starting_req; 1422 do { 1423 if (set->ks_req[i].kr_picnum != -1) { 1424 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1425 bitmap |= (1 << set->ks_req[i].kr_picnum); 1426 if (++i == set->ks_nreqs) 1427 i = 0; 1428 continue; 1429 } 1430 1431 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1432 for (j = 0; j < cpc_ncounters; j++) { 1433 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1434 (resmap & (1 << j)) == 0) { 1435 /* 1436 * We can assign this counter because: 1437 * 1438 * 1. It can count the event (ctrmap) 1439 * 2. It hasn't been assigned yet (bitmap) 1440 * 3. It wasn't reserved by a request (resmap) 1441 */ 1442 bitmap |= (1 << j); 1443 break; 1444 } 1445 } 1446 if (j == cpc_ncounters) { 1447 for (i = 0; i < set->ks_nreqs; i++) 1448 set->ks_req[i].kr_picnum = scratch[i]; 1449 return (-1); 1450 } 1451 set->ks_req[i].kr_picnum = j; 1452 1453 if (++i == set->ks_nreqs) 1454 i = 0; 1455 } while (i != starting_req); 1456 1457 return (0); 1458 } 1459 1460 kcpc_set_t * 1461 kcpc_dup_set(kcpc_set_t *set) 1462 { 1463 kcpc_set_t *new; 1464 int i; 1465 int j; 1466 1467 new = kmem_alloc(sizeof (*new), KM_SLEEP); 1468 new->ks_flags = set->ks_flags; 1469 new->ks_nreqs = set->ks_nreqs; 1470 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1471 KM_SLEEP); 1472 new->ks_data = NULL; 1473 new->ks_ctx = NULL; 1474 1475 for (i = 0; i < new->ks_nreqs; i++) { 1476 new->ks_req[i].kr_config = NULL; 1477 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1478 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1479 new->ks_req[i].kr_picp = NULL; 1480 new->ks_req[i].kr_data = NULL; 1481 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1482 CPC_MAX_EVENT_LEN); 1483 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1484 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1485 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1486 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1487 sizeof (kcpc_attr_t), KM_SLEEP); 1488 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1489 new->ks_req[i].kr_attr[j].ka_val = 1490 set->ks_req[i].kr_attr[j].ka_val; 1491 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1492 set->ks_req[i].kr_attr[j].ka_name, 1493 CPC_MAX_ATTR_LEN); 1494 } 1495 } 1496 1497 return (new); 1498 } 1499 1500 int 1501 kcpc_allow_nonpriv(void *token) 1502 { 1503 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1504 } 1505 1506 void 1507 kcpc_invalidate(kthread_t *t) 1508 { 1509 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1510 1511 if (ctx != NULL) 1512 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1513 } 1514 1515 /* 1516 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1517 * are used to construct PCBE names, starting with the most specific, 1518 * "pcbe.first.second.third.fourth" and ending with the least specific, 1519 * "pcbe.first". 1520 * 1521 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1522 */ 1523 int 1524 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1525 { 1526 uint_t s[3]; 1527 1528 s[0] = first; 1529 s[1] = second; 1530 s[2] = third; 1531 1532 return (modload_qualified("pcbe", 1533 "pcbe", prefix, ".", s, 3) < 0 ? -1 : 0); 1534 } 1535