1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/thread.h> 31 #include <sys/cpuvar.h> 32 #include <sys/inttypes.h> 33 #include <sys/cmn_err.h> 34 #include <sys/time.h> 35 #include <sys/mutex.h> 36 #include <sys/systm.h> 37 #include <sys/kcpc.h> 38 #include <sys/cpc_impl.h> 39 #include <sys/cpc_pcbe.h> 40 #include <sys/atomic.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/sdt.h> 44 #if defined(__x86) 45 #include <asm/clock.h> 46 #endif 47 48 kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 49 kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 50 51 52 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 53 int kcpc_cpuctx; /* number of cpu-specific contexts */ 54 55 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 56 57 /* 58 * These are set when a PCBE module is loaded. 59 */ 60 uint_t cpc_ncounters = 0; 61 pcbe_ops_t *pcbe_ops = NULL; 62 63 /* 64 * Statistics on (mis)behavior 65 */ 66 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 67 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 68 69 /* 70 * Is misbehaviour (overflow in a thread with no context) fatal? 71 */ 72 #ifdef DEBUG 73 static int kcpc_nullctx_panic = 1; 74 #else 75 static int kcpc_nullctx_panic = 0; 76 #endif 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_free(kcpc_ctx_t *ctx, int isexec); 82 static int kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode); 83 static void kcpc_free_configs(kcpc_set_t *set); 84 static kcpc_ctx_t *kcpc_ctx_alloc(void); 85 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 86 static void kcpc_ctx_free(kcpc_ctx_t *ctx); 87 static int kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx); 88 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 89 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 90 91 void 92 kcpc_register_pcbe(pcbe_ops_t *ops) 93 { 94 pcbe_ops = ops; 95 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 96 } 97 98 int 99 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 100 { 101 cpu_t *cp; 102 kcpc_ctx_t *ctx; 103 int error; 104 105 ctx = kcpc_ctx_alloc(); 106 107 if (kcpc_assign_reqs(set, ctx) != 0) { 108 kcpc_ctx_free(ctx); 109 *subcode = CPC_RESOURCE_UNAVAIL; 110 return (EINVAL); 111 } 112 113 ctx->kc_cpuid = cpuid; 114 ctx->kc_thread = curthread; 115 116 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 117 118 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 119 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 120 kcpc_ctx_free(ctx); 121 return (error); 122 } 123 124 set->ks_ctx = ctx; 125 ctx->kc_set = set; 126 127 /* 128 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 129 * we are manipulating the cpu_t and programming the hardware, else the 130 * the cpu_t could go away while we're looking at it. 131 */ 132 mutex_enter(&cpu_lock); 133 cp = cpu_get(cpuid); 134 135 if (cp == NULL) 136 /* 137 * The CPU could have been DRd out while we were getting set up. 138 */ 139 goto unbound; 140 141 mutex_enter(&cp->cpu_cpc_ctxlock); 142 143 if (cp->cpu_cpc_ctx != NULL) { 144 /* 145 * If this CPU already has a bound set, return an error. 146 */ 147 mutex_exit(&cp->cpu_cpc_ctxlock); 148 goto unbound; 149 } 150 151 if (curthread->t_bind_cpu != cpuid) { 152 mutex_exit(&cp->cpu_cpc_ctxlock); 153 goto unbound; 154 } 155 cp->cpu_cpc_ctx = ctx; 156 157 /* 158 * Kernel preemption must be disabled while fiddling with the hardware 159 * registers to prevent partial updates. 160 */ 161 kpreempt_disable(); 162 ctx->kc_rawtick = KCPC_GET_TICK(); 163 pcbe_ops->pcbe_program(ctx); 164 kpreempt_enable(); 165 166 mutex_exit(&cp->cpu_cpc_ctxlock); 167 mutex_exit(&cpu_lock); 168 169 return (0); 170 171 unbound: 172 mutex_exit(&cpu_lock); 173 set->ks_ctx = NULL; 174 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 175 kcpc_ctx_free(ctx); 176 return (EAGAIN); 177 } 178 179 int 180 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 181 { 182 kcpc_ctx_t *ctx; 183 int error; 184 185 /* 186 * Only one set is allowed per context, so ensure there is no 187 * existing context. 188 */ 189 190 if (t->t_cpc_ctx != NULL) 191 return (EEXIST); 192 193 ctx = kcpc_ctx_alloc(); 194 195 /* 196 * The context must begin life frozen until it has been properly 197 * programmed onto the hardware. This prevents the context ops from 198 * worrying about it until we're ready. 199 */ 200 ctx->kc_flags |= KCPC_CTX_FREEZE; 201 ctx->kc_hrtime = gethrtime(); 202 203 if (kcpc_assign_reqs(set, ctx) != 0) { 204 kcpc_ctx_free(ctx); 205 *subcode = CPC_RESOURCE_UNAVAIL; 206 return (EINVAL); 207 } 208 209 ctx->kc_cpuid = -1; 210 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 211 ctx->kc_flags |= KCPC_CTX_LWPINHERIT; 212 ctx->kc_thread = t; 213 t->t_cpc_ctx = ctx; 214 /* 215 * Permit threads to look at their own hardware counters from userland. 216 */ 217 ctx->kc_flags |= KCPC_CTX_NONPRIV; 218 219 /* 220 * Create the data store for this set. 221 */ 222 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 223 224 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 225 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 226 kcpc_ctx_free(ctx); 227 t->t_cpc_ctx = NULL; 228 return (error); 229 } 230 231 set->ks_ctx = ctx; 232 ctx->kc_set = set; 233 234 /* 235 * Add a device context to the subject thread. 236 */ 237 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 238 kcpc_lwp_create, NULL, kcpc_free); 239 240 /* 241 * Ask the backend to program the hardware. 242 */ 243 if (t == curthread) { 244 kpreempt_disable(); 245 ctx->kc_rawtick = KCPC_GET_TICK(); 246 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 247 pcbe_ops->pcbe_program(ctx); 248 kpreempt_enable(); 249 } else 250 /* 251 * Since we are the agent LWP, we know the victim LWP is stopped 252 * until we're done here; no need to worry about preemption or 253 * migration here. We still use an atomic op to clear the flag 254 * to ensure the flags are always self-consistent; they can 255 * still be accessed from, for instance, another CPU doing a 256 * kcpc_invalidate_all(). 257 */ 258 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 259 260 261 return (0); 262 } 263 264 /* 265 * Walk through each request in the set and ask the PCBE to configure a 266 * corresponding counter. 267 */ 268 static int 269 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 270 { 271 int i; 272 int ret; 273 kcpc_request_t *rp; 274 275 for (i = 0; i < set->ks_nreqs; i++) { 276 int n; 277 rp = &set->ks_req[i]; 278 279 n = rp->kr_picnum; 280 281 ASSERT(n >= 0 && n < cpc_ncounters); 282 283 ASSERT(ctx->kc_pics[n].kp_req == NULL); 284 285 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 286 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 287 == 0) { 288 *subcode = -1; 289 return (ENOTSUP); 290 } 291 /* 292 * If any of the counters have requested overflow 293 * notification, we flag the context as being one that 294 * cares about overflow. 295 */ 296 ctx->kc_flags |= KCPC_CTX_SIGOVF; 297 } 298 299 rp->kr_config = NULL; 300 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 301 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 302 &(rp->kr_config), (void *)ctx)) != 0) { 303 kcpc_free_configs(set); 304 *subcode = ret; 305 if (ret == CPC_ATTR_REQUIRES_PRIVILEGE) 306 return (EACCES); 307 return (EINVAL); 308 } 309 310 ctx->kc_pics[n].kp_req = rp; 311 rp->kr_picp = &ctx->kc_pics[n]; 312 rp->kr_data = set->ks_data + rp->kr_index; 313 *rp->kr_data = rp->kr_preset; 314 } 315 316 return (0); 317 } 318 319 static void 320 kcpc_free_configs(kcpc_set_t *set) 321 { 322 int i; 323 324 for (i = 0; i < set->ks_nreqs; i++) 325 if (set->ks_req[i].kr_config != NULL) 326 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 327 } 328 329 /* 330 * buf points to a user address and the data should be copied out to that 331 * address in the current process. 332 */ 333 int 334 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 335 { 336 kcpc_ctx_t *ctx = set->ks_ctx; 337 uint64_t curtick = KCPC_GET_TICK(); 338 339 if (ctx == NULL) 340 return (EINVAL); 341 else if (ctx->kc_flags & KCPC_CTX_INVALID) 342 return (EAGAIN); 343 344 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 345 /* 346 * Kernel preemption must be disabled while reading the 347 * hardware regs, and if this is a CPU-bound context, while 348 * checking the CPU binding of the current thread. 349 */ 350 kpreempt_disable(); 351 352 if (ctx->kc_cpuid != -1) { 353 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 354 kpreempt_enable(); 355 return (EAGAIN); 356 } 357 } 358 359 if (ctx->kc_thread == curthread) { 360 ctx->kc_hrtime = gethrtime(); 361 pcbe_ops->pcbe_sample(ctx); 362 ctx->kc_vtick += curtick - ctx->kc_rawtick; 363 ctx->kc_rawtick = curtick; 364 } 365 366 kpreempt_enable(); 367 } 368 369 if (copyout(set->ks_data, buf, 370 set->ks_nreqs * sizeof (uint64_t)) == -1) 371 return (EFAULT); 372 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 373 return (EFAULT); 374 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 375 return (EFAULT); 376 377 return (0); 378 } 379 380 /* 381 * Stop the counters on the CPU this context is bound to. 382 */ 383 static void 384 kcpc_stop_hw(kcpc_ctx_t *ctx) 385 { 386 cpu_t *cp; 387 388 ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) 389 == KCPC_CTX_INVALID); 390 391 kpreempt_disable(); 392 393 cp = cpu_get(ctx->kc_cpuid); 394 ASSERT(cp != NULL); 395 396 if (cp == CPU) { 397 pcbe_ops->pcbe_allstop(); 398 atomic_or_uint(&ctx->kc_flags, 399 KCPC_CTX_INVALID_STOPPED); 400 } else 401 kcpc_remote_stop(cp); 402 kpreempt_enable(); 403 } 404 405 int 406 kcpc_unbind(kcpc_set_t *set) 407 { 408 kcpc_ctx_t *ctx = set->ks_ctx; 409 kthread_t *t; 410 411 if (ctx == NULL) 412 return (EINVAL); 413 414 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 415 416 if (ctx->kc_cpuid == -1) { 417 t = ctx->kc_thread; 418 /* 419 * The context is thread-bound and therefore has a device 420 * context. It will be freed via removectx() calling 421 * freectx() calling kcpc_free(). 422 */ 423 if (t == curthread && 424 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 425 kpreempt_disable(); 426 pcbe_ops->pcbe_allstop(); 427 atomic_or_uint(&ctx->kc_flags, 428 KCPC_CTX_INVALID_STOPPED); 429 kpreempt_enable(); 430 } 431 #ifdef DEBUG 432 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 433 kcpc_lwp_create, NULL, kcpc_free) == 0) 434 panic("kcpc_unbind: context %p not preset on thread %p", 435 ctx, t); 436 #else 437 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 438 kcpc_lwp_create, NULL, kcpc_free); 439 #endif /* DEBUG */ 440 t->t_cpc_set = NULL; 441 t->t_cpc_ctx = NULL; 442 } else { 443 /* 444 * If we are unbinding a CPU-bound set from a remote CPU, the 445 * native CPU's idle thread could be in the midst of programming 446 * this context onto the CPU. We grab the context's lock here to 447 * ensure that the idle thread is done with it. When we release 448 * the lock, the CPU no longer has a context and the idle thread 449 * will move on. 450 * 451 * cpu_lock must be held to prevent the CPU from being DR'd out 452 * while we disassociate the context from the cpu_t. 453 */ 454 cpu_t *cp; 455 mutex_enter(&cpu_lock); 456 cp = cpu_get(ctx->kc_cpuid); 457 if (cp != NULL) { 458 /* 459 * The CPU may have been DR'd out of the system. 460 */ 461 mutex_enter(&cp->cpu_cpc_ctxlock); 462 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 463 kcpc_stop_hw(ctx); 464 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 465 cp->cpu_cpc_ctx = NULL; 466 mutex_exit(&cp->cpu_cpc_ctxlock); 467 } 468 mutex_exit(&cpu_lock); 469 if (ctx->kc_thread == curthread) { 470 kcpc_free(ctx, 0); 471 curthread->t_cpc_set = NULL; 472 } 473 } 474 475 return (0); 476 } 477 478 int 479 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 480 { 481 int i; 482 483 ASSERT(set != NULL); 484 ASSERT(set->ks_ctx != NULL); 485 ASSERT(set->ks_ctx->kc_thread == curthread); 486 ASSERT(set->ks_ctx->kc_cpuid == -1); 487 488 if (index < 0 || index >= set->ks_nreqs) 489 return (EINVAL); 490 491 for (i = 0; i < set->ks_nreqs; i++) 492 if (set->ks_req[i].kr_index == index) 493 break; 494 ASSERT(i != set->ks_nreqs); 495 496 set->ks_req[i].kr_preset = preset; 497 return (0); 498 } 499 500 int 501 kcpc_restart(kcpc_set_t *set) 502 { 503 kcpc_ctx_t *ctx = set->ks_ctx; 504 int i; 505 506 ASSERT(ctx != NULL); 507 ASSERT(ctx->kc_thread == curthread); 508 ASSERT(ctx->kc_cpuid == -1); 509 510 kpreempt_disable(); 511 512 /* 513 * If the user is doing this on a running set, make sure the counters 514 * are stopped first. 515 */ 516 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 517 pcbe_ops->pcbe_allstop(); 518 519 for (i = 0; i < set->ks_nreqs; i++) { 520 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 521 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 522 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 523 } 524 525 /* 526 * Ask the backend to program the hardware. 527 */ 528 ctx->kc_rawtick = KCPC_GET_TICK(); 529 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 530 pcbe_ops->pcbe_program(ctx); 531 kpreempt_enable(); 532 533 return (0); 534 } 535 536 /* 537 * Caller must hold kcpc_cpuctx_lock. 538 */ 539 int 540 kcpc_enable(kthread_t *t, int cmd, int enable) 541 { 542 kcpc_ctx_t *ctx = t->t_cpc_ctx; 543 kcpc_set_t *set = t->t_cpc_set; 544 kcpc_set_t *newset; 545 int i; 546 int flag; 547 int err; 548 549 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 550 551 if (ctx == NULL) { 552 /* 553 * This thread has a set but no context; it must be a 554 * CPU-bound set. 555 */ 556 ASSERT(t->t_cpc_set != NULL); 557 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 558 return (EINVAL); 559 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 560 return (EAGAIN); 561 562 if (cmd == CPC_ENABLE) { 563 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 564 return (EINVAL); 565 kpreempt_disable(); 566 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 567 kcpc_restore(ctx); 568 kpreempt_enable(); 569 } else if (cmd == CPC_DISABLE) { 570 if (ctx->kc_flags & KCPC_CTX_FREEZE) 571 return (EINVAL); 572 kpreempt_disable(); 573 kcpc_save(ctx); 574 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 575 kpreempt_enable(); 576 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 577 /* 578 * Strategy for usr/sys: stop counters and update set's presets 579 * with current counter values, unbind, update requests with 580 * new config, then re-bind. 581 */ 582 flag = (cmd == CPC_USR_EVENTS) ? 583 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 584 585 kpreempt_disable(); 586 atomic_or_uint(&ctx->kc_flags, 587 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 588 pcbe_ops->pcbe_allstop(); 589 kpreempt_enable(); 590 for (i = 0; i < set->ks_nreqs; i++) { 591 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 592 if (enable) 593 set->ks_req[i].kr_flags |= flag; 594 else 595 set->ks_req[i].kr_flags &= ~flag; 596 } 597 newset = kcpc_dup_set(set); 598 if (kcpc_unbind(set) != 0) 599 return (EINVAL); 600 t->t_cpc_set = newset; 601 if (kcpc_bind_thread(newset, t, &err) != 0) { 602 t->t_cpc_set = NULL; 603 kcpc_free_set(newset); 604 return (EINVAL); 605 } 606 } else 607 return (EINVAL); 608 609 return (0); 610 } 611 612 /* 613 * Provide PCBEs with a way of obtaining the configs of every counter which will 614 * be programmed together. 615 * 616 * If current is NULL, provide the first config. 617 * 618 * If data != NULL, caller wants to know where the data store associated with 619 * the config we return is located. 620 */ 621 void * 622 kcpc_next_config(void *token, void *current, uint64_t **data) 623 { 624 int i; 625 kcpc_pic_t *pic; 626 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 627 628 if (current == NULL) { 629 /* 630 * Client would like the first config, which may not be in 631 * counter 0; we need to search through the counters for the 632 * first config. 633 */ 634 for (i = 0; i < cpc_ncounters; i++) 635 if (ctx->kc_pics[i].kp_req != NULL) 636 break; 637 /* 638 * There are no counters configured for the given context. 639 */ 640 if (i == cpc_ncounters) 641 return (NULL); 642 } else { 643 /* 644 * There surely is a faster way to do this. 645 */ 646 for (i = 0; i < cpc_ncounters; i++) { 647 pic = &ctx->kc_pics[i]; 648 649 if (pic->kp_req != NULL && 650 current == pic->kp_req->kr_config) 651 break; 652 } 653 654 /* 655 * We found the current config at picnum i. Now search for the 656 * next configured PIC. 657 */ 658 for (i++; i < cpc_ncounters; i++) { 659 pic = &ctx->kc_pics[i]; 660 if (pic->kp_req != NULL) 661 break; 662 } 663 664 if (i == cpc_ncounters) 665 return (NULL); 666 } 667 668 if (data != NULL) { 669 *data = ctx->kc_pics[i].kp_req->kr_data; 670 } 671 672 return (ctx->kc_pics[i].kp_req->kr_config); 673 } 674 675 676 static kcpc_ctx_t * 677 kcpc_ctx_alloc(void) 678 { 679 kcpc_ctx_t *ctx; 680 long hash; 681 682 ctx = (kcpc_ctx_t *)kmem_alloc(sizeof (kcpc_ctx_t), KM_SLEEP); 683 684 hash = CPC_HASH_CTX(ctx); 685 mutex_enter(&kcpc_ctx_llock[hash]); 686 ctx->kc_next = kcpc_ctx_list[hash]; 687 kcpc_ctx_list[hash] = ctx; 688 mutex_exit(&kcpc_ctx_llock[hash]); 689 690 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 691 cpc_ncounters, KM_SLEEP); 692 693 ctx->kc_flags = 0; 694 ctx->kc_vtick = 0; 695 ctx->kc_rawtick = 0; 696 ctx->kc_cpuid = -1; 697 698 return (ctx); 699 } 700 701 /* 702 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 703 * in the flags. 704 */ 705 static void 706 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 707 { 708 kcpc_set_t *ks = ctx->kc_set, *cks; 709 int i, j; 710 int code; 711 712 ASSERT(ks != NULL); 713 714 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 715 return; 716 717 cks = kmem_alloc(sizeof (*cks), KM_SLEEP); 718 cctx->kc_set = cks; 719 cks->ks_flags = ks->ks_flags; 720 cks->ks_nreqs = ks->ks_nreqs; 721 cks->ks_req = kmem_alloc(cks->ks_nreqs * 722 sizeof (kcpc_request_t), KM_SLEEP); 723 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 724 KM_SLEEP); 725 cks->ks_ctx = cctx; 726 727 for (i = 0; i < cks->ks_nreqs; i++) { 728 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 729 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 730 (void) strncpy(cks->ks_req[i].kr_event, 731 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 732 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 733 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 734 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 735 if (ks->ks_req[i].kr_nattrs > 0) { 736 cks->ks_req[i].kr_attr = 737 kmem_alloc(ks->ks_req[i].kr_nattrs * 738 sizeof (kcpc_attr_t), KM_SLEEP); 739 } 740 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 741 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 742 ks->ks_req[i].kr_attr[j].ka_name, 743 CPC_MAX_ATTR_LEN); 744 cks->ks_req[i].kr_attr[j].ka_val = 745 ks->ks_req[i].kr_attr[j].ka_val; 746 } 747 } 748 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 749 panic("kcpc_ctx_clone: configure of context %p with " 750 "set %p failed with subcode %d", cctx, cks, code); 751 } 752 753 754 static void 755 kcpc_ctx_free(kcpc_ctx_t *ctx) 756 { 757 kcpc_ctx_t **loc; 758 long hash = CPC_HASH_CTX(ctx); 759 760 mutex_enter(&kcpc_ctx_llock[hash]); 761 loc = &kcpc_ctx_list[hash]; 762 ASSERT(*loc != NULL); 763 while (*loc != ctx) 764 loc = &(*loc)->kc_next; 765 *loc = ctx->kc_next; 766 mutex_exit(&kcpc_ctx_llock[hash]); 767 768 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 769 kmem_free(ctx, sizeof (*ctx)); 770 } 771 772 /* 773 * Generic interrupt handler used on hardware that generates 774 * overflow interrupts. 775 * 776 * Note: executed at high-level interrupt context! 777 */ 778 /*ARGSUSED*/ 779 kcpc_ctx_t * 780 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 781 { 782 kcpc_ctx_t *ctx; 783 kthread_t *t = curthread; 784 int i; 785 786 /* 787 * On both x86 and UltraSPARC, we may deliver the high-level 788 * interrupt in kernel mode, just after we've started to run an 789 * interrupt thread. (That's because the hardware helpfully 790 * delivers the overflow interrupt some random number of cycles 791 * after the instruction that caused the overflow by which time 792 * we're in some part of the kernel, not necessarily running on 793 * the right thread). 794 * 795 * Check for this case here -- find the pinned thread 796 * that was running when the interrupt went off. 797 */ 798 if (t->t_flag & T_INTR_THREAD) { 799 klwp_t *lwp; 800 801 atomic_add_32(&kcpc_intrctx_count, 1); 802 803 /* 804 * Note that t_lwp is always set to point at the underlying 805 * thread, thus this will work in the presence of nested 806 * interrupts. 807 */ 808 ctx = NULL; 809 if ((lwp = t->t_lwp) != NULL) { 810 t = lwptot(lwp); 811 ctx = t->t_cpc_ctx; 812 } 813 } else 814 ctx = t->t_cpc_ctx; 815 816 if (ctx == NULL) { 817 /* 818 * This can easily happen if we're using the counters in 819 * "shared" mode, for example, and an overflow interrupt 820 * occurs while we are running cpustat. In that case, the 821 * bound thread that has the context that belongs to this 822 * CPU is almost certainly sleeping (if it was running on 823 * the CPU we'd have found it above), and the actual 824 * interrupted thread has no knowledge of performance counters! 825 */ 826 ctx = curthread->t_cpu->cpu_cpc_ctx; 827 if (ctx != NULL) { 828 /* 829 * Return the bound context for this CPU to 830 * the interrupt handler so that it can synchronously 831 * sample the hardware counters and restart them. 832 */ 833 return (ctx); 834 } 835 836 /* 837 * As long as the overflow interrupt really is delivered early 838 * enough after trapping into the kernel to avoid switching 839 * threads, we must always be able to find the cpc context, 840 * or something went terribly wrong i.e. we ended up 841 * running a passivated interrupt thread, a kernel 842 * thread or we interrupted idle, all of which are Very Bad. 843 */ 844 if (kcpc_nullctx_panic) 845 panic("null cpc context, thread %p", (void *)t); 846 atomic_add_32(&kcpc_nullctx_count, 1); 847 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 848 /* 849 * Schedule an ast to sample the counters, which will 850 * propagate any overflow into the virtualized performance 851 * counter(s), and may deliver a signal. 852 */ 853 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 854 /* 855 * If a counter has overflowed which was counting on behalf of 856 * a request which specified CPC_OVF_NOTIFY_EMT, send the 857 * process a signal. 858 */ 859 for (i = 0; i < cpc_ncounters; i++) { 860 if (ctx->kc_pics[i].kp_req != NULL && 861 bitmap & (1 << i) && 862 ctx->kc_pics[i].kp_req->kr_flags & 863 CPC_OVF_NOTIFY_EMT) { 864 /* 865 * A signal has been requested for this PIC, so 866 * so freeze the context. The interrupt handler 867 * has already stopped the counter hardware. 868 */ 869 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_FREEZE); 870 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 871 KCPC_PIC_OVERFLOWED); 872 } 873 } 874 aston(t); 875 } 876 return (NULL); 877 } 878 879 /* 880 * The current thread context had an overflow interrupt; we're 881 * executing here in high-level interrupt context. 882 */ 883 /*ARGSUSED*/ 884 uint_t 885 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 886 { 887 kcpc_ctx_t *ctx; 888 uint64_t bitmap; 889 890 if (pcbe_ops == NULL || 891 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 892 return (DDI_INTR_UNCLAIMED); 893 894 /* 895 * Prevent any further interrupts. 896 */ 897 pcbe_ops->pcbe_allstop(); 898 899 /* 900 * Invoke the "generic" handler. 901 * 902 * If the interrupt has occurred in the context of an lwp owning 903 * the counters, then the handler posts an AST to the lwp to 904 * trigger the actual sampling, and optionally deliver a signal or 905 * restart the counters, on the way out of the kernel using 906 * kcpc_hw_overflow_ast() (see below). 907 * 908 * On the other hand, if the handler returns the context to us 909 * directly, then it means that there are no other threads in 910 * the middle of updating it, no AST has been posted, and so we 911 * should sample the counters here, and restart them with no 912 * further fuss. 913 */ 914 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 915 uint64_t curtick = KCPC_GET_TICK(); 916 917 ctx->kc_hrtime = gethrtime_waitfree(); 918 ctx->kc_vtick += curtick - ctx->kc_rawtick; 919 ctx->kc_rawtick = curtick; 920 pcbe_ops->pcbe_sample(ctx); 921 pcbe_ops->pcbe_program(ctx); 922 } 923 924 return (DDI_INTR_CLAIMED); 925 } 926 927 /* 928 * Called from trap() when processing the ast posted by the high-level 929 * interrupt handler. 930 */ 931 int 932 kcpc_overflow_ast() 933 { 934 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 935 int i; 936 int found = 0; 937 uint64_t curtick = KCPC_GET_TICK(); 938 939 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 940 941 /* 942 * An overflow happened: sample the context to ensure that 943 * the overflow is propagated into the upper bits of the 944 * virtualized 64-bit counter(s). 945 */ 946 kpreempt_disable(); 947 ctx->kc_hrtime = gethrtime_waitfree(); 948 pcbe_ops->pcbe_sample(ctx); 949 kpreempt_enable(); 950 951 ctx->kc_vtick += curtick - ctx->kc_rawtick; 952 953 /* 954 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 955 * if that pic generated an overflow and if the request it was counting 956 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 957 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 958 * found any overflowed pics, keep the context frozen and return true 959 * (thus causing a signal to be sent). 960 */ 961 for (i = 0; i < cpc_ncounters; i++) { 962 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 963 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 964 ~KCPC_PIC_OVERFLOWED); 965 found = 1; 966 } 967 } 968 if (found) 969 return (1); 970 971 /* 972 * Otherwise, re-enable the counters and continue life as before. 973 */ 974 kpreempt_disable(); 975 atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); 976 pcbe_ops->pcbe_program(ctx); 977 kpreempt_enable(); 978 return (0); 979 } 980 981 /* 982 * Called when switching away from current thread. 983 */ 984 static void 985 kcpc_save(kcpc_ctx_t *ctx) 986 { 987 if (ctx->kc_flags & KCPC_CTX_INVALID) { 988 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) 989 return; 990 /* 991 * This context has been invalidated but the counters have not 992 * been stopped. Stop them here and mark the context stopped. 993 */ 994 pcbe_ops->pcbe_allstop(); 995 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 996 return; 997 } 998 999 pcbe_ops->pcbe_allstop(); 1000 if (ctx->kc_flags & KCPC_CTX_FREEZE) 1001 return; 1002 1003 /* 1004 * Need to sample for all reqs into each req's current mpic. 1005 */ 1006 ctx->kc_hrtime = gethrtime(); 1007 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1008 pcbe_ops->pcbe_sample(ctx); 1009 } 1010 1011 static void 1012 kcpc_restore(kcpc_ctx_t *ctx) 1013 { 1014 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1015 KCPC_CTX_INVALID) 1016 /* 1017 * The context is invalidated but has not been marked stopped. 1018 * We mark it as such here because we will not start the 1019 * counters during this context switch. 1020 */ 1021 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); 1022 1023 1024 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) 1025 return; 1026 1027 /* 1028 * While programming the hardware, the counters should be stopped. We 1029 * don't do an explicit pcbe_allstop() here because they should have 1030 * been stopped already by the last consumer. 1031 */ 1032 ctx->kc_rawtick = KCPC_GET_TICK(); 1033 pcbe_ops->pcbe_program(ctx); 1034 } 1035 1036 /* 1037 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1038 * following context operators to the idle thread on each CPU. They stop the 1039 * counters when the idle thread is switched on, and they start them again when 1040 * it is switched off. 1041 */ 1042 1043 /*ARGSUSED*/ 1044 void 1045 kcpc_idle_save(struct cpu *cp) 1046 { 1047 /* 1048 * The idle thread shouldn't be run anywhere else. 1049 */ 1050 ASSERT(CPU == cp); 1051 1052 /* 1053 * We must hold the CPU's context lock to ensure the context isn't freed 1054 * while we're looking at it. 1055 */ 1056 mutex_enter(&cp->cpu_cpc_ctxlock); 1057 1058 if ((cp->cpu_cpc_ctx == NULL) || 1059 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1060 mutex_exit(&cp->cpu_cpc_ctxlock); 1061 return; 1062 } 1063 1064 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1065 mutex_exit(&cp->cpu_cpc_ctxlock); 1066 } 1067 1068 void 1069 kcpc_idle_restore(struct cpu *cp) 1070 { 1071 /* 1072 * The idle thread shouldn't be run anywhere else. 1073 */ 1074 ASSERT(CPU == cp); 1075 1076 /* 1077 * We must hold the CPU's context lock to ensure the context isn't freed 1078 * while we're looking at it. 1079 */ 1080 mutex_enter(&cp->cpu_cpc_ctxlock); 1081 1082 if ((cp->cpu_cpc_ctx == NULL) || 1083 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1084 mutex_exit(&cp->cpu_cpc_ctxlock); 1085 return; 1086 } 1087 1088 pcbe_ops->pcbe_allstop(); 1089 mutex_exit(&cp->cpu_cpc_ctxlock); 1090 } 1091 1092 /*ARGSUSED*/ 1093 static void 1094 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1095 { 1096 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1097 int i; 1098 1099 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1100 return; 1101 1102 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1103 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1104 rw_exit(&kcpc_cpuctx_lock); 1105 return; 1106 } 1107 cctx = kcpc_ctx_alloc(); 1108 kcpc_ctx_clone(ctx, cctx); 1109 rw_exit(&kcpc_cpuctx_lock); 1110 1111 cctx->kc_flags = ctx->kc_flags; 1112 cctx->kc_thread = ct; 1113 cctx->kc_cpuid = -1; 1114 ct->t_cpc_set = cctx->kc_set; 1115 ct->t_cpc_ctx = cctx; 1116 1117 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1118 kcpc_set_t *ks = cctx->kc_set; 1119 /* 1120 * Our contract with the user requires us to immediately send an 1121 * overflow signal to all children if we have the LWPINHERIT 1122 * and SIGOVF flags set. In addition, all counters should be 1123 * set to UINT64_MAX, and their pic's overflow flag turned on 1124 * so that our trap() processing knows to send a signal. 1125 */ 1126 atomic_or_uint(&cctx->kc_flags, KCPC_CTX_FREEZE); 1127 for (i = 0; i < ks->ks_nreqs; i++) { 1128 kcpc_request_t *kr = &ks->ks_req[i]; 1129 1130 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1131 *(kr->kr_data) = UINT64_MAX; 1132 kr->kr_picp->kp_flags |= KCPC_PIC_OVERFLOWED; 1133 } 1134 } 1135 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1136 aston(ct); 1137 } 1138 1139 installctx(ct, cctx, kcpc_save, kcpc_restore, 1140 NULL, kcpc_lwp_create, NULL, kcpc_free); 1141 } 1142 1143 /* 1144 * Counter Stoppage Theory 1145 * 1146 * The counters may need to be stopped properly at the following occasions: 1147 * 1148 * 1) An LWP exits. 1149 * 2) A thread exits. 1150 * 3) An LWP performs an exec(). 1151 * 4) A bound set is unbound. 1152 * 1153 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1154 * to be freed as well. 1155 * 1156 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1157 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1158 * context. 1159 * 1160 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1161 * 1162 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1163 * been called from exec. It stops the counters _and_ frees the context. 1164 * 1165 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1166 * 1167 * CPU-bound counters are always stopped via kcpc_unbind(). 1168 */ 1169 1170 /* 1171 * We're being called to delete the context; we ensure that all associated data 1172 * structures are freed, and that the hardware is passivated if this is an exec. 1173 */ 1174 1175 /*ARGSUSED*/ 1176 static void 1177 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1178 { 1179 int i; 1180 kcpc_set_t *set = ctx->kc_set; 1181 1182 ASSERT(set != NULL); 1183 1184 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1185 1186 if (isexec) { 1187 /* 1188 * This thread is execing, and after the exec it should not have 1189 * any performance counter context. Stop the counters properly 1190 * here so the system isn't surprised by an overflow interrupt 1191 * later. 1192 */ 1193 if (ctx->kc_cpuid != -1) { 1194 cpu_t *cp; 1195 /* 1196 * CPU-bound context; stop the appropriate CPU's ctrs. 1197 * Hold cpu_lock while examining the CPU to ensure it 1198 * doesn't go away. 1199 */ 1200 mutex_enter(&cpu_lock); 1201 cp = cpu_get(ctx->kc_cpuid); 1202 /* 1203 * The CPU could have been DR'd out, so only stop the 1204 * CPU and clear its context pointer if the CPU still 1205 * exists. 1206 */ 1207 if (cp != NULL) { 1208 mutex_enter(&cp->cpu_cpc_ctxlock); 1209 kcpc_stop_hw(ctx); 1210 cp->cpu_cpc_ctx = NULL; 1211 mutex_exit(&cp->cpu_cpc_ctxlock); 1212 } 1213 mutex_exit(&cpu_lock); 1214 ASSERT(curthread->t_cpc_ctx == NULL); 1215 } else { 1216 /* 1217 * Thread-bound context; stop _this_ CPU's counters. 1218 */ 1219 kpreempt_disable(); 1220 pcbe_ops->pcbe_allstop(); 1221 atomic_or_uint(&ctx->kc_flags, 1222 KCPC_CTX_INVALID_STOPPED); 1223 kpreempt_enable(); 1224 curthread->t_cpc_ctx = NULL; 1225 } 1226 1227 /* 1228 * Since we are being called from an exec and we know that 1229 * exec is not permitted via the agent thread, we should clean 1230 * up this thread's CPC state completely, and not leave dangling 1231 * CPC pointers behind. 1232 */ 1233 ASSERT(ctx->kc_thread == curthread); 1234 curthread->t_cpc_set = NULL; 1235 } 1236 1237 /* 1238 * Walk through each request in this context's set and free the PCBE's 1239 * configuration if it exists. 1240 */ 1241 for (i = 0; i < set->ks_nreqs; i++) { 1242 if (set->ks_req[i].kr_config != NULL) 1243 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1244 } 1245 1246 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1247 kcpc_ctx_free(ctx); 1248 kcpc_free_set(set); 1249 } 1250 1251 /* 1252 * Free the memory associated with a request set. 1253 */ 1254 void 1255 kcpc_free_set(kcpc_set_t *set) 1256 { 1257 int i; 1258 kcpc_request_t *req; 1259 1260 ASSERT(set->ks_req != NULL); 1261 1262 for (i = 0; i < set->ks_nreqs; i++) { 1263 req = &set->ks_req[i]; 1264 1265 if (req->kr_nattrs != 0) { 1266 kmem_free(req->kr_attr, 1267 req->kr_nattrs * sizeof (kcpc_attr_t)); 1268 } 1269 } 1270 1271 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1272 kmem_free(set, sizeof (kcpc_set_t)); 1273 } 1274 1275 /* 1276 * Grab every existing context and mark it as invalid. 1277 */ 1278 void 1279 kcpc_invalidate_all(void) 1280 { 1281 kcpc_ctx_t *ctx; 1282 long hash; 1283 1284 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1285 mutex_enter(&kcpc_ctx_llock[hash]); 1286 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1287 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1288 mutex_exit(&kcpc_ctx_llock[hash]); 1289 } 1290 } 1291 1292 /* 1293 * Called from lwp_exit() and thread_exit() 1294 */ 1295 void 1296 kcpc_passivate(void) 1297 { 1298 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1299 kcpc_set_t *set = curthread->t_cpc_set; 1300 1301 if (set == NULL) 1302 return; 1303 1304 /* 1305 * We're cleaning up after this thread; ensure there are no dangling 1306 * CPC pointers left behind. The context and set will be freed by 1307 * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in 1308 * the case of a CPU-bound set. 1309 */ 1310 curthread->t_cpc_ctx = NULL; 1311 1312 if (ctx == NULL) { 1313 /* 1314 * This thread has a set but no context; it must be a CPU-bound 1315 * set. The hardware will be stopped via kcpc_unbind() when the 1316 * process exits and closes its file descriptors with 1317 * kcpc_close(). Our only job here is to clean up this thread's 1318 * state; the set will be freed with the unbind(). 1319 */ 1320 (void) kcpc_unbind(set); 1321 /* 1322 * Unbinding a set belonging to the current thread should clear 1323 * its set pointer. 1324 */ 1325 ASSERT(curthread->t_cpc_set == NULL); 1326 return; 1327 } 1328 1329 curthread->t_cpc_set = NULL; 1330 1331 /* 1332 * This thread/LWP is exiting but context switches will continue to 1333 * happen for a bit as the exit proceeds. Kernel preemption must be 1334 * disabled here to prevent a race between checking or setting the 1335 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1336 * a context switch. 1337 */ 1338 1339 kpreempt_disable(); 1340 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1341 pcbe_ops->pcbe_allstop(); 1342 atomic_or_uint(&ctx->kc_flags, 1343 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1344 } 1345 kpreempt_enable(); 1346 } 1347 1348 /* 1349 * Assign the requests in the given set to the PICs in the context. 1350 * Returns 0 if successful, -1 on failure. 1351 */ 1352 /*ARGSUSED*/ 1353 static int 1354 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1355 { 1356 int i; 1357 int *picnum_save; 1358 1359 ASSERT(set->ks_nreqs <= cpc_ncounters); 1360 1361 /* 1362 * Provide kcpc_tryassign() with scratch space to avoid doing an 1363 * alloc/free with every invocation. 1364 */ 1365 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1366 /* 1367 * kcpc_tryassign() blindly walks through each request in the set, 1368 * seeing if a counter can count its event. If yes, it assigns that 1369 * counter. However, that counter may have been the only capable counter 1370 * for _another_ request's event. The solution is to try every possible 1371 * request first. Note that this does not cover all solutions, as 1372 * that would require all unique orderings of requests, an n^n operation 1373 * which would be unacceptable for architectures with many counters. 1374 */ 1375 for (i = 0; i < set->ks_nreqs; i++) 1376 if (kcpc_tryassign(set, i, picnum_save) == 0) 1377 break; 1378 1379 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1380 if (i == set->ks_nreqs) 1381 return (-1); 1382 return (0); 1383 } 1384 1385 static int 1386 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1387 { 1388 int i; 1389 int j; 1390 uint64_t bitmap = 0, resmap = 0; 1391 uint64_t ctrmap; 1392 1393 /* 1394 * We are attempting to assign the reqs to pics, but we may fail. If we 1395 * fail, we need to restore the state of the requests to what it was 1396 * when we found it, as some reqs may have been explicitly assigned to 1397 * a specific PIC beforehand. We do this by snapshotting the assignments 1398 * now and restoring from it later if we fail. 1399 * 1400 * Also we note here which counters have already been claimed by 1401 * requests with explicit counter assignments. 1402 */ 1403 for (i = 0; i < set->ks_nreqs; i++) { 1404 scratch[i] = set->ks_req[i].kr_picnum; 1405 if (set->ks_req[i].kr_picnum != -1) 1406 resmap |= (1 << set->ks_req[i].kr_picnum); 1407 } 1408 1409 /* 1410 * Walk through requests assigning them to the first PIC that is 1411 * capable. 1412 */ 1413 i = starting_req; 1414 do { 1415 if (set->ks_req[i].kr_picnum != -1) { 1416 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1417 bitmap |= (1 << set->ks_req[i].kr_picnum); 1418 if (++i == set->ks_nreqs) 1419 i = 0; 1420 continue; 1421 } 1422 1423 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1424 for (j = 0; j < cpc_ncounters; j++) { 1425 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1426 (resmap & (1 << j)) == 0) { 1427 /* 1428 * We can assign this counter because: 1429 * 1430 * 1. It can count the event (ctrmap) 1431 * 2. It hasn't been assigned yet (bitmap) 1432 * 3. It wasn't reserved by a request (resmap) 1433 */ 1434 bitmap |= (1 << j); 1435 break; 1436 } 1437 } 1438 if (j == cpc_ncounters) { 1439 for (i = 0; i < set->ks_nreqs; i++) 1440 set->ks_req[i].kr_picnum = scratch[i]; 1441 return (-1); 1442 } 1443 set->ks_req[i].kr_picnum = j; 1444 1445 if (++i == set->ks_nreqs) 1446 i = 0; 1447 } while (i != starting_req); 1448 1449 return (0); 1450 } 1451 1452 kcpc_set_t * 1453 kcpc_dup_set(kcpc_set_t *set) 1454 { 1455 kcpc_set_t *new; 1456 int i; 1457 int j; 1458 1459 new = kmem_alloc(sizeof (*new), KM_SLEEP); 1460 new->ks_flags = set->ks_flags; 1461 new->ks_nreqs = set->ks_nreqs; 1462 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1463 KM_SLEEP); 1464 new->ks_data = NULL; 1465 new->ks_ctx = NULL; 1466 1467 for (i = 0; i < new->ks_nreqs; i++) { 1468 new->ks_req[i].kr_config = NULL; 1469 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1470 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1471 new->ks_req[i].kr_picp = NULL; 1472 new->ks_req[i].kr_data = NULL; 1473 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1474 CPC_MAX_EVENT_LEN); 1475 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1476 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1477 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1478 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1479 sizeof (kcpc_attr_t), KM_SLEEP); 1480 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1481 new->ks_req[i].kr_attr[j].ka_val = 1482 set->ks_req[i].kr_attr[j].ka_val; 1483 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1484 set->ks_req[i].kr_attr[j].ka_name, 1485 CPC_MAX_ATTR_LEN); 1486 } 1487 } 1488 1489 return (new); 1490 } 1491 1492 int 1493 kcpc_allow_nonpriv(void *token) 1494 { 1495 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1496 } 1497 1498 void 1499 kcpc_invalidate(kthread_t *t) 1500 { 1501 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1502 1503 if (ctx != NULL) 1504 atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID); 1505 } 1506 1507 /* 1508 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1509 * are used to construct PCBE names, starting with the most specific, 1510 * "pcbe.first.second.third.fourth" and ending with the least specific, 1511 * "pcbe.first". 1512 * 1513 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1514 */ 1515 int 1516 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1517 { 1518 char modname[PCBE_NAMELEN]; 1519 char stub[PCBE_NAMELEN]; 1520 1521 if (prefix != NULL) 1522 (void) snprintf(stub, PCBE_NAMELEN, "pcbe.%s", prefix); 1523 else 1524 (void) snprintf(stub, PCBE_NAMELEN, "pcbe"); 1525 1526 (void) snprintf(modname, PCBE_NAMELEN, "%s.%u.%u.%u", 1527 stub, first, second, third); 1528 1529 DTRACE_PROBE1(kcpc__pcbe__spec, char *, modname); 1530 1531 if (modload("pcbe", modname) >= 0) 1532 return (0); 1533 1534 (void) snprintf(modname, PCBE_NAMELEN, "%s.%u.%u", 1535 stub, first, second); 1536 if (modload("pcbe", modname) >= 0) 1537 return (0); 1538 1539 (void) snprintf(modname, PCBE_NAMELEN, "%s.%u", stub, first); 1540 if (modload("pcbe", modname) >= 0) 1541 return (0); 1542 1543 if (prefix == NULL) 1544 /* 1545 * If no prefix was given, we have tried all possible 1546 * PCBE names. 1547 */ 1548 return (-1); 1549 1550 (void) snprintf(modname, PCBE_NAMELEN, "%s", stub); 1551 if (modload("pcbe", modname) >= 0) 1552 return (0); 1553 1554 return (-1); 1555 } 1556