1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/param.h> 27 #include <sys/thread.h> 28 #include <sys/cpuvar.h> 29 #include <sys/inttypes.h> 30 #include <sys/cmn_err.h> 31 #include <sys/time.h> 32 #include <sys/ksynch.h> 33 #include <sys/systm.h> 34 #include <sys/kcpc.h> 35 #include <sys/cpc_impl.h> 36 #include <sys/cpc_pcbe.h> 37 #include <sys/atomic.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/sdt.h> 41 #include <sys/archsystm.h> 42 #include <sys/promif.h> 43 #include <sys/x_call.h> 44 #include <sys/cap_util.h> 45 #if defined(__x86) 46 #include <asm/clock.h> 47 #include <sys/xc_levels.h> 48 #endif 49 50 static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 51 static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 52 53 54 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 55 int kcpc_cpuctx; /* number of cpu-specific contexts */ 56 57 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 58 59 /* 60 * These are set when a PCBE module is loaded. 61 */ 62 uint_t cpc_ncounters = 0; 63 pcbe_ops_t *pcbe_ops = NULL; 64 65 /* 66 * Statistics on (mis)behavior 67 */ 68 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 69 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 70 71 /* 72 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread 73 * with no valid context will result in a panic. 74 */ 75 static int kcpc_nullctx_panic = 0; 76 77 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 78 static void kcpc_restore(kcpc_ctx_t *ctx); 79 static void kcpc_save(kcpc_ctx_t *ctx); 80 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 81 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 82 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 83 static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs, 84 int set_flags, int kmem_flags); 85 86 /* 87 * Macros to manipulate context flags. All flag updates should use one of these 88 * two macros 89 * 90 * Flags should be always be updated atomically since some of the updates are 91 * not protected by locks. 92 */ 93 #define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag)) 94 #define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag)) 95 96 /* 97 * The IS_HIPIL() macro verifies that the code is executed either from a 98 * cross-call or from high-PIL interrupt 99 */ 100 #ifdef DEBUG 101 #define IS_HIPIL() (getpil() >= XCALL_PIL) 102 #else 103 #define IS_HIPIL() 104 #endif /* DEBUG */ 105 106 107 extern int kcpc_hw_load_pcbe(void); 108 109 /* 110 * Return value from kcpc_hw_load_pcbe() 111 */ 112 static int kcpc_pcbe_error = 0; 113 114 /* 115 * Perform one-time initialization of kcpc framework. 116 * This function performs the initialization only the first time it is called. 117 * It is safe to call it multiple times. 118 */ 119 int 120 kcpc_init(void) 121 { 122 long hash; 123 static uint32_t kcpc_initialized = 0; 124 125 /* 126 * We already tried loading platform pcbe module and failed 127 */ 128 if (kcpc_pcbe_error != 0) 129 return (-1); 130 131 /* 132 * The kcpc framework should be initialized at most once 133 */ 134 if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0) 135 return (0); 136 137 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL); 138 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 139 mutex_init(&kcpc_ctx_llock[hash], 140 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15); 141 142 /* 143 * Load platform-specific pcbe module 144 */ 145 kcpc_pcbe_error = kcpc_hw_load_pcbe(); 146 147 return (kcpc_pcbe_error == 0 ? 0 : -1); 148 } 149 150 void 151 kcpc_register_pcbe(pcbe_ops_t *ops) 152 { 153 pcbe_ops = ops; 154 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 155 } 156 157 void 158 kcpc_register_dcpc(void (*func)(uint64_t)) 159 { 160 dtrace_cpc_fire = func; 161 } 162 163 void 164 kcpc_unregister_dcpc(void) 165 { 166 dtrace_cpc_fire = NULL; 167 } 168 169 int 170 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 171 { 172 cpu_t *cp; 173 kcpc_ctx_t *ctx; 174 int error; 175 int save_spl; 176 177 ctx = kcpc_ctx_alloc(KM_SLEEP); 178 179 if (kcpc_assign_reqs(set, ctx) != 0) { 180 kcpc_ctx_free(ctx); 181 *subcode = CPC_RESOURCE_UNAVAIL; 182 return (EINVAL); 183 } 184 185 ctx->kc_cpuid = cpuid; 186 ctx->kc_thread = curthread; 187 188 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 189 190 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 191 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 192 kcpc_ctx_free(ctx); 193 return (error); 194 } 195 196 set->ks_ctx = ctx; 197 ctx->kc_set = set; 198 199 /* 200 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 201 * we are manipulating the cpu_t and programming the hardware, else the 202 * the cpu_t could go away while we're looking at it. 203 */ 204 mutex_enter(&cpu_lock); 205 cp = cpu_get(cpuid); 206 207 if (cp == NULL) 208 /* 209 * The CPU could have been DRd out while we were getting set up. 210 */ 211 goto unbound; 212 213 mutex_enter(&cp->cpu_cpc_ctxlock); 214 kpreempt_disable(); 215 save_spl = spl_xcall(); 216 217 /* 218 * Check to see whether counters for CPU already being used by someone 219 * other than kernel for capacity and utilization (since kernel will 220 * let go of counters for user in kcpc_program() below) 221 */ 222 if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) { 223 /* 224 * If this CPU already has a bound set, return an error. 225 */ 226 splx(save_spl); 227 kpreempt_enable(); 228 mutex_exit(&cp->cpu_cpc_ctxlock); 229 goto unbound; 230 } 231 232 if (curthread->t_bind_cpu != cpuid) { 233 splx(save_spl); 234 kpreempt_enable(); 235 mutex_exit(&cp->cpu_cpc_ctxlock); 236 goto unbound; 237 } 238 239 kcpc_program(ctx, B_FALSE, B_TRUE); 240 241 splx(save_spl); 242 kpreempt_enable(); 243 244 mutex_exit(&cp->cpu_cpc_ctxlock); 245 mutex_exit(&cpu_lock); 246 247 mutex_enter(&set->ks_lock); 248 set->ks_state |= KCPC_SET_BOUND; 249 cv_signal(&set->ks_condv); 250 mutex_exit(&set->ks_lock); 251 252 return (0); 253 254 unbound: 255 mutex_exit(&cpu_lock); 256 set->ks_ctx = NULL; 257 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 258 kcpc_ctx_free(ctx); 259 return (EAGAIN); 260 } 261 262 int 263 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 264 { 265 kcpc_ctx_t *ctx; 266 int error; 267 268 /* 269 * Only one set is allowed per context, so ensure there is no 270 * existing context. 271 */ 272 273 if (t->t_cpc_ctx != NULL) 274 return (EEXIST); 275 276 ctx = kcpc_ctx_alloc(KM_SLEEP); 277 278 /* 279 * The context must begin life frozen until it has been properly 280 * programmed onto the hardware. This prevents the context ops from 281 * worrying about it until we're ready. 282 */ 283 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 284 ctx->kc_hrtime = gethrtime(); 285 286 if (kcpc_assign_reqs(set, ctx) != 0) { 287 kcpc_ctx_free(ctx); 288 *subcode = CPC_RESOURCE_UNAVAIL; 289 return (EINVAL); 290 } 291 292 ctx->kc_cpuid = -1; 293 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 294 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT); 295 ctx->kc_thread = t; 296 t->t_cpc_ctx = ctx; 297 /* 298 * Permit threads to look at their own hardware counters from userland. 299 */ 300 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV); 301 302 /* 303 * Create the data store for this set. 304 */ 305 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 306 307 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 308 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 309 kcpc_ctx_free(ctx); 310 t->t_cpc_ctx = NULL; 311 return (error); 312 } 313 314 set->ks_ctx = ctx; 315 ctx->kc_set = set; 316 317 /* 318 * Add a device context to the subject thread. 319 */ 320 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 321 kcpc_lwp_create, NULL, kcpc_free); 322 323 /* 324 * Ask the backend to program the hardware. 325 */ 326 if (t == curthread) { 327 int save_spl; 328 329 kpreempt_disable(); 330 save_spl = spl_xcall(); 331 kcpc_program(ctx, B_TRUE, B_TRUE); 332 splx(save_spl); 333 kpreempt_enable(); 334 } else { 335 /* 336 * Since we are the agent LWP, we know the victim LWP is stopped 337 * until we're done here; no need to worry about preemption or 338 * migration here. We still use an atomic op to clear the flag 339 * to ensure the flags are always self-consistent; they can 340 * still be accessed from, for instance, another CPU doing a 341 * kcpc_invalidate_all(). 342 */ 343 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 344 } 345 346 mutex_enter(&set->ks_lock); 347 set->ks_state |= KCPC_SET_BOUND; 348 cv_signal(&set->ks_condv); 349 mutex_exit(&set->ks_lock); 350 351 return (0); 352 } 353 354 /* 355 * Walk through each request in the set and ask the PCBE to configure a 356 * corresponding counter. 357 */ 358 int 359 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 360 { 361 int i; 362 int ret; 363 kcpc_request_t *rp; 364 365 for (i = 0; i < set->ks_nreqs; i++) { 366 int n; 367 rp = &set->ks_req[i]; 368 369 n = rp->kr_picnum; 370 371 ASSERT(n >= 0 && n < cpc_ncounters); 372 373 ASSERT(ctx->kc_pics[n].kp_req == NULL); 374 375 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 376 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 377 == 0) { 378 *subcode = -1; 379 return (ENOTSUP); 380 } 381 /* 382 * If any of the counters have requested overflow 383 * notification, we flag the context as being one that 384 * cares about overflow. 385 */ 386 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF); 387 } 388 389 rp->kr_config = NULL; 390 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 391 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 392 &(rp->kr_config), (void *)ctx)) != 0) { 393 kcpc_free_configs(set); 394 *subcode = ret; 395 switch (ret) { 396 case CPC_ATTR_REQUIRES_PRIVILEGE: 397 case CPC_HV_NO_ACCESS: 398 return (EACCES); 399 default: 400 return (EINVAL); 401 } 402 } 403 404 ctx->kc_pics[n].kp_req = rp; 405 rp->kr_picp = &ctx->kc_pics[n]; 406 rp->kr_data = set->ks_data + rp->kr_index; 407 *rp->kr_data = rp->kr_preset; 408 } 409 410 return (0); 411 } 412 413 void 414 kcpc_free_configs(kcpc_set_t *set) 415 { 416 int i; 417 418 for (i = 0; i < set->ks_nreqs; i++) 419 if (set->ks_req[i].kr_config != NULL) 420 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 421 } 422 423 /* 424 * buf points to a user address and the data should be copied out to that 425 * address in the current process. 426 */ 427 int 428 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 429 { 430 kcpc_ctx_t *ctx = set->ks_ctx; 431 int save_spl; 432 433 mutex_enter(&set->ks_lock); 434 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 435 mutex_exit(&set->ks_lock); 436 return (EINVAL); 437 } 438 mutex_exit(&set->ks_lock); 439 440 /* 441 * Kernel preemption must be disabled while reading the hardware regs, 442 * and if this is a CPU-bound context, while checking the CPU binding of 443 * the current thread. 444 */ 445 kpreempt_disable(); 446 save_spl = spl_xcall(); 447 448 if (ctx->kc_flags & KCPC_CTX_INVALID) { 449 splx(save_spl); 450 kpreempt_enable(); 451 return (EAGAIN); 452 } 453 454 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 455 if (ctx->kc_cpuid != -1) { 456 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 457 splx(save_spl); 458 kpreempt_enable(); 459 return (EAGAIN); 460 } 461 } 462 463 if (ctx->kc_thread == curthread) { 464 uint64_t curtick = KCPC_GET_TICK(); 465 466 ctx->kc_hrtime = gethrtime_waitfree(); 467 pcbe_ops->pcbe_sample(ctx); 468 ctx->kc_vtick += curtick - ctx->kc_rawtick; 469 ctx->kc_rawtick = curtick; 470 } 471 472 /* 473 * The config may have been invalidated by 474 * the pcbe_sample op. 475 */ 476 if (ctx->kc_flags & KCPC_CTX_INVALID) { 477 splx(save_spl); 478 kpreempt_enable(); 479 return (EAGAIN); 480 } 481 482 } 483 484 splx(save_spl); 485 kpreempt_enable(); 486 487 if (copyout(set->ks_data, buf, 488 set->ks_nreqs * sizeof (uint64_t)) == -1) 489 return (EFAULT); 490 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 491 return (EFAULT); 492 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 493 return (EFAULT); 494 495 return (0); 496 } 497 498 /* 499 * Stop the counters on the CPU this context is bound to. 500 */ 501 static void 502 kcpc_stop_hw(kcpc_ctx_t *ctx) 503 { 504 cpu_t *cp; 505 506 kpreempt_disable(); 507 508 if (ctx->kc_cpuid == CPU->cpu_id) { 509 cp = CPU; 510 } else { 511 cp = cpu_get(ctx->kc_cpuid); 512 } 513 514 ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx); 515 kcpc_cpu_stop(cp, B_FALSE); 516 517 kpreempt_enable(); 518 } 519 520 int 521 kcpc_unbind(kcpc_set_t *set) 522 { 523 kcpc_ctx_t *ctx; 524 kthread_t *t; 525 526 /* 527 * We could be racing with the process's agent thread as it 528 * binds the set; we must wait for the set to finish binding 529 * before attempting to tear it down. 530 */ 531 mutex_enter(&set->ks_lock); 532 while ((set->ks_state & KCPC_SET_BOUND) == 0) 533 cv_wait(&set->ks_condv, &set->ks_lock); 534 mutex_exit(&set->ks_lock); 535 536 ctx = set->ks_ctx; 537 538 /* 539 * Use kc_lock to synchronize with kcpc_restore(). 540 */ 541 mutex_enter(&ctx->kc_lock); 542 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 543 mutex_exit(&ctx->kc_lock); 544 545 if (ctx->kc_cpuid == -1) { 546 t = ctx->kc_thread; 547 /* 548 * The context is thread-bound and therefore has a device 549 * context. It will be freed via removectx() calling 550 * freectx() calling kcpc_free(). 551 */ 552 if (t == curthread) { 553 int save_spl; 554 555 kpreempt_disable(); 556 save_spl = spl_xcall(); 557 if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) 558 kcpc_unprogram(ctx, B_TRUE); 559 splx(save_spl); 560 kpreempt_enable(); 561 } 562 #ifdef DEBUG 563 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 564 kcpc_lwp_create, NULL, kcpc_free) == 0) 565 panic("kcpc_unbind: context %p not preset on thread %p", 566 (void *)ctx, (void *)t); 567 #else 568 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 569 kcpc_lwp_create, NULL, kcpc_free); 570 #endif /* DEBUG */ 571 t->t_cpc_set = NULL; 572 t->t_cpc_ctx = NULL; 573 } else { 574 /* 575 * If we are unbinding a CPU-bound set from a remote CPU, the 576 * native CPU's idle thread could be in the midst of programming 577 * this context onto the CPU. We grab the context's lock here to 578 * ensure that the idle thread is done with it. When we release 579 * the lock, the CPU no longer has a context and the idle thread 580 * will move on. 581 * 582 * cpu_lock must be held to prevent the CPU from being DR'd out 583 * while we disassociate the context from the cpu_t. 584 */ 585 cpu_t *cp; 586 mutex_enter(&cpu_lock); 587 cp = cpu_get(ctx->kc_cpuid); 588 if (cp != NULL) { 589 /* 590 * The CPU may have been DR'd out of the system. 591 */ 592 mutex_enter(&cp->cpu_cpc_ctxlock); 593 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 594 kcpc_stop_hw(ctx); 595 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 596 mutex_exit(&cp->cpu_cpc_ctxlock); 597 } 598 mutex_exit(&cpu_lock); 599 if (ctx->kc_thread == curthread) { 600 kcpc_free(ctx, 0); 601 curthread->t_cpc_set = NULL; 602 } 603 } 604 605 return (0); 606 } 607 608 int 609 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 610 { 611 int i; 612 613 ASSERT(set != NULL); 614 ASSERT(set->ks_state & KCPC_SET_BOUND); 615 ASSERT(set->ks_ctx->kc_thread == curthread); 616 ASSERT(set->ks_ctx->kc_cpuid == -1); 617 618 if (index < 0 || index >= set->ks_nreqs) 619 return (EINVAL); 620 621 for (i = 0; i < set->ks_nreqs; i++) 622 if (set->ks_req[i].kr_index == index) 623 break; 624 ASSERT(i != set->ks_nreqs); 625 626 set->ks_req[i].kr_preset = preset; 627 return (0); 628 } 629 630 int 631 kcpc_restart(kcpc_set_t *set) 632 { 633 kcpc_ctx_t *ctx = set->ks_ctx; 634 int i; 635 int save_spl; 636 637 ASSERT(set->ks_state & KCPC_SET_BOUND); 638 ASSERT(ctx->kc_thread == curthread); 639 ASSERT(ctx->kc_cpuid == -1); 640 641 for (i = 0; i < set->ks_nreqs; i++) { 642 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 643 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 644 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 645 } 646 647 kpreempt_disable(); 648 save_spl = spl_xcall(); 649 650 /* 651 * If the user is doing this on a running set, make sure the counters 652 * are stopped first. 653 */ 654 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 655 pcbe_ops->pcbe_allstop(); 656 657 /* 658 * Ask the backend to program the hardware. 659 */ 660 ctx->kc_rawtick = KCPC_GET_TICK(); 661 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 662 pcbe_ops->pcbe_program(ctx); 663 splx(save_spl); 664 kpreempt_enable(); 665 666 return (0); 667 } 668 669 /* 670 * Caller must hold kcpc_cpuctx_lock. 671 */ 672 int 673 kcpc_enable(kthread_t *t, int cmd, int enable) 674 { 675 kcpc_ctx_t *ctx = t->t_cpc_ctx; 676 kcpc_set_t *set = t->t_cpc_set; 677 kcpc_set_t *newset; 678 int i; 679 int flag; 680 int err; 681 682 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 683 684 if (ctx == NULL) { 685 /* 686 * This thread has a set but no context; it must be a 687 * CPU-bound set. 688 */ 689 ASSERT(t->t_cpc_set != NULL); 690 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 691 return (EINVAL); 692 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 693 return (EAGAIN); 694 695 if (cmd == CPC_ENABLE) { 696 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 697 return (EINVAL); 698 kpreempt_disable(); 699 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 700 kcpc_restore(ctx); 701 kpreempt_enable(); 702 } else if (cmd == CPC_DISABLE) { 703 if (ctx->kc_flags & KCPC_CTX_FREEZE) 704 return (EINVAL); 705 kpreempt_disable(); 706 kcpc_save(ctx); 707 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 708 kpreempt_enable(); 709 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 710 /* 711 * Strategy for usr/sys: stop counters and update set's presets 712 * with current counter values, unbind, update requests with 713 * new config, then re-bind. 714 */ 715 flag = (cmd == CPC_USR_EVENTS) ? 716 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 717 718 kpreempt_disable(); 719 KCPC_CTX_FLAG_SET(ctx, 720 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 721 pcbe_ops->pcbe_allstop(); 722 kpreempt_enable(); 723 724 for (i = 0; i < set->ks_nreqs; i++) { 725 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 726 if (enable) 727 set->ks_req[i].kr_flags |= flag; 728 else 729 set->ks_req[i].kr_flags &= ~flag; 730 } 731 newset = kcpc_dup_set(set); 732 if (kcpc_unbind(set) != 0) 733 return (EINVAL); 734 t->t_cpc_set = newset; 735 if (kcpc_bind_thread(newset, t, &err) != 0) { 736 t->t_cpc_set = NULL; 737 kcpc_free_set(newset); 738 return (EINVAL); 739 } 740 } else 741 return (EINVAL); 742 743 return (0); 744 } 745 746 /* 747 * Provide PCBEs with a way of obtaining the configs of every counter which will 748 * be programmed together. 749 * 750 * If current is NULL, provide the first config. 751 * 752 * If data != NULL, caller wants to know where the data store associated with 753 * the config we return is located. 754 */ 755 void * 756 kcpc_next_config(void *token, void *current, uint64_t **data) 757 { 758 int i; 759 kcpc_pic_t *pic; 760 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 761 762 if (current == NULL) { 763 /* 764 * Client would like the first config, which may not be in 765 * counter 0; we need to search through the counters for the 766 * first config. 767 */ 768 for (i = 0; i < cpc_ncounters; i++) 769 if (ctx->kc_pics[i].kp_req != NULL) 770 break; 771 /* 772 * There are no counters configured for the given context. 773 */ 774 if (i == cpc_ncounters) 775 return (NULL); 776 } else { 777 /* 778 * There surely is a faster way to do this. 779 */ 780 for (i = 0; i < cpc_ncounters; i++) { 781 pic = &ctx->kc_pics[i]; 782 783 if (pic->kp_req != NULL && 784 current == pic->kp_req->kr_config) 785 break; 786 } 787 788 /* 789 * We found the current config at picnum i. Now search for the 790 * next configured PIC. 791 */ 792 for (i++; i < cpc_ncounters; i++) { 793 pic = &ctx->kc_pics[i]; 794 if (pic->kp_req != NULL) 795 break; 796 } 797 798 if (i == cpc_ncounters) 799 return (NULL); 800 } 801 802 if (data != NULL) { 803 *data = ctx->kc_pics[i].kp_req->kr_data; 804 } 805 806 return (ctx->kc_pics[i].kp_req->kr_config); 807 } 808 809 810 kcpc_ctx_t * 811 kcpc_ctx_alloc(int kmem_flags) 812 { 813 kcpc_ctx_t *ctx; 814 long hash; 815 816 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags); 817 if (ctx == NULL) 818 return (NULL); 819 820 hash = CPC_HASH_CTX(ctx); 821 mutex_enter(&kcpc_ctx_llock[hash]); 822 ctx->kc_next = kcpc_ctx_list[hash]; 823 kcpc_ctx_list[hash] = ctx; 824 mutex_exit(&kcpc_ctx_llock[hash]); 825 826 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 827 cpc_ncounters, KM_SLEEP); 828 829 ctx->kc_cpuid = -1; 830 831 return (ctx); 832 } 833 834 /* 835 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 836 * in the flags. 837 */ 838 static void 839 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 840 { 841 kcpc_set_t *ks = ctx->kc_set, *cks; 842 int i, j; 843 int code; 844 845 ASSERT(ks != NULL); 846 847 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 848 return; 849 850 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 851 cks->ks_state &= ~KCPC_SET_BOUND; 852 cctx->kc_set = cks; 853 cks->ks_flags = ks->ks_flags; 854 cks->ks_nreqs = ks->ks_nreqs; 855 cks->ks_req = kmem_alloc(cks->ks_nreqs * 856 sizeof (kcpc_request_t), KM_SLEEP); 857 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 858 KM_SLEEP); 859 cks->ks_ctx = cctx; 860 861 for (i = 0; i < cks->ks_nreqs; i++) { 862 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 863 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 864 (void) strncpy(cks->ks_req[i].kr_event, 865 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 866 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 867 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 868 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 869 if (ks->ks_req[i].kr_nattrs > 0) { 870 cks->ks_req[i].kr_attr = 871 kmem_alloc(ks->ks_req[i].kr_nattrs * 872 sizeof (kcpc_attr_t), KM_SLEEP); 873 } 874 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 875 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 876 ks->ks_req[i].kr_attr[j].ka_name, 877 CPC_MAX_ATTR_LEN); 878 cks->ks_req[i].kr_attr[j].ka_val = 879 ks->ks_req[i].kr_attr[j].ka_val; 880 } 881 } 882 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 883 kcpc_invalidate_config(cctx); 884 885 mutex_enter(&cks->ks_lock); 886 cks->ks_state |= KCPC_SET_BOUND; 887 cv_signal(&cks->ks_condv); 888 mutex_exit(&cks->ks_lock); 889 } 890 891 892 void 893 kcpc_ctx_free(kcpc_ctx_t *ctx) 894 { 895 kcpc_ctx_t **loc; 896 long hash = CPC_HASH_CTX(ctx); 897 898 mutex_enter(&kcpc_ctx_llock[hash]); 899 loc = &kcpc_ctx_list[hash]; 900 ASSERT(*loc != NULL); 901 while (*loc != ctx) 902 loc = &(*loc)->kc_next; 903 *loc = ctx->kc_next; 904 mutex_exit(&kcpc_ctx_llock[hash]); 905 906 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 907 cv_destroy(&ctx->kc_condv); 908 mutex_destroy(&ctx->kc_lock); 909 kmem_free(ctx, sizeof (*ctx)); 910 } 911 912 /* 913 * Generic interrupt handler used on hardware that generates 914 * overflow interrupts. 915 * 916 * Note: executed at high-level interrupt context! 917 */ 918 /*ARGSUSED*/ 919 kcpc_ctx_t * 920 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 921 { 922 kcpc_ctx_t *ctx; 923 kthread_t *t = curthread; 924 int i; 925 926 /* 927 * On both x86 and UltraSPARC, we may deliver the high-level 928 * interrupt in kernel mode, just after we've started to run an 929 * interrupt thread. (That's because the hardware helpfully 930 * delivers the overflow interrupt some random number of cycles 931 * after the instruction that caused the overflow by which time 932 * we're in some part of the kernel, not necessarily running on 933 * the right thread). 934 * 935 * Check for this case here -- find the pinned thread 936 * that was running when the interrupt went off. 937 */ 938 if (t->t_flag & T_INTR_THREAD) { 939 klwp_t *lwp; 940 941 atomic_inc_32(&kcpc_intrctx_count); 942 943 /* 944 * Note that t_lwp is always set to point at the underlying 945 * thread, thus this will work in the presence of nested 946 * interrupts. 947 */ 948 ctx = NULL; 949 if ((lwp = t->t_lwp) != NULL) { 950 t = lwptot(lwp); 951 ctx = t->t_cpc_ctx; 952 } 953 } else 954 ctx = t->t_cpc_ctx; 955 956 if (ctx == NULL) { 957 /* 958 * This can easily happen if we're using the counters in 959 * "shared" mode, for example, and an overflow interrupt 960 * occurs while we are running cpustat. In that case, the 961 * bound thread that has the context that belongs to this 962 * CPU is almost certainly sleeping (if it was running on 963 * the CPU we'd have found it above), and the actual 964 * interrupted thread has no knowledge of performance counters! 965 */ 966 ctx = curthread->t_cpu->cpu_cpc_ctx; 967 if (ctx != NULL) { 968 /* 969 * Return the bound context for this CPU to 970 * the interrupt handler so that it can synchronously 971 * sample the hardware counters and restart them. 972 */ 973 return (ctx); 974 } 975 976 /* 977 * As long as the overflow interrupt really is delivered early 978 * enough after trapping into the kernel to avoid switching 979 * threads, we must always be able to find the cpc context, 980 * or something went terribly wrong i.e. we ended up 981 * running a passivated interrupt thread, a kernel 982 * thread or we interrupted idle, all of which are Very Bad. 983 * 984 * We also could end up here owing to an incredibly unlikely 985 * race condition that exists on x86 based architectures when 986 * the cpc provider is in use; overflow interrupts are directed 987 * to the cpc provider if the 'dtrace_cpc_in_use' variable is 988 * set when we enter the handler. This variable is unset after 989 * overflow interrupts have been disabled on all CPUs and all 990 * contexts have been torn down. To stop interrupts, the cpc 991 * provider issues a xcall to the remote CPU before it tears 992 * down that CPUs context. As high priority xcalls, on an x86 993 * architecture, execute at a higher PIL than this handler, it 994 * is possible (though extremely unlikely) that the xcall could 995 * interrupt the overflow handler before the handler has 996 * checked the 'dtrace_cpc_in_use' variable, stop the counters, 997 * return to the cpc provider which could then rip down 998 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs 999 * overflow handler has had a chance to check the variable. In 1000 * that case, the handler would direct the overflow into this 1001 * code and no valid context will be found. The default behavior 1002 * when no valid context is found is now to shout a warning to 1003 * the console and bump the 'kcpc_nullctx_count' variable. 1004 */ 1005 if (kcpc_nullctx_panic) 1006 panic("null cpc context, thread %p", (void *)t); 1007 #ifdef DEBUG 1008 cmn_err(CE_NOTE, 1009 "null cpc context found in overflow handler!\n"); 1010 #endif 1011 atomic_inc_32(&kcpc_nullctx_count); 1012 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 1013 /* 1014 * Schedule an ast to sample the counters, which will 1015 * propagate any overflow into the virtualized performance 1016 * counter(s), and may deliver a signal. 1017 */ 1018 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1019 /* 1020 * If a counter has overflowed which was counting on behalf of 1021 * a request which specified CPC_OVF_NOTIFY_EMT, send the 1022 * process a signal. 1023 */ 1024 for (i = 0; i < cpc_ncounters; i++) { 1025 if (ctx->kc_pics[i].kp_req != NULL && 1026 bitmap & (1 << i) && 1027 ctx->kc_pics[i].kp_req->kr_flags & 1028 CPC_OVF_NOTIFY_EMT) { 1029 /* 1030 * A signal has been requested for this PIC, so 1031 * so freeze the context. The interrupt handler 1032 * has already stopped the counter hardware. 1033 */ 1034 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1035 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 1036 KCPC_PIC_OVERFLOWED); 1037 } 1038 } 1039 aston(t); 1040 } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1041 /* 1042 * Thread context is no longer valid, but here may be a valid 1043 * CPU context. 1044 */ 1045 return (curthread->t_cpu->cpu_cpc_ctx); 1046 } 1047 1048 return (NULL); 1049 } 1050 1051 /* 1052 * The current thread context had an overflow interrupt; we're 1053 * executing here in high-level interrupt context. 1054 */ 1055 /*ARGSUSED*/ 1056 uint_t 1057 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 1058 { 1059 kcpc_ctx_t *ctx; 1060 uint64_t bitmap; 1061 uint8_t *state; 1062 int save_spl; 1063 1064 if (pcbe_ops == NULL || 1065 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 1066 return (DDI_INTR_UNCLAIMED); 1067 1068 /* 1069 * Prevent any further interrupts. 1070 */ 1071 pcbe_ops->pcbe_allstop(); 1072 1073 if (dtrace_cpc_in_use) { 1074 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state; 1075 1076 /* 1077 * Set the per-CPU state bit to indicate that we are currently 1078 * processing an interrupt if it is currently free. Drop the 1079 * interrupt if the state isn't free (i.e. a configuration 1080 * event is taking place). 1081 */ 1082 if (atomic_cas_8(state, DCPC_INTR_FREE, 1083 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) { 1084 int i; 1085 kcpc_request_t req; 1086 1087 ASSERT(dtrace_cpc_fire != NULL); 1088 1089 (*dtrace_cpc_fire)(bitmap); 1090 1091 ctx = curthread->t_cpu->cpu_cpc_ctx; 1092 if (ctx == NULL) { 1093 #ifdef DEBUG 1094 cmn_err(CE_NOTE, "null cpc context in" 1095 "hardware overflow handler!\n"); 1096 #endif 1097 return (DDI_INTR_CLAIMED); 1098 } 1099 1100 /* Reset any counters that have overflowed */ 1101 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) { 1102 req = ctx->kc_set->ks_req[i]; 1103 1104 if (bitmap & (1 << req.kr_picnum)) { 1105 pcbe_ops->pcbe_configure(req.kr_picnum, 1106 req.kr_event, req.kr_preset, 1107 req.kr_flags, req.kr_nattrs, 1108 req.kr_attr, &(req.kr_config), 1109 (void *)ctx); 1110 } 1111 } 1112 pcbe_ops->pcbe_program(ctx); 1113 1114 /* 1115 * We've finished processing the interrupt so set 1116 * the state back to free. 1117 */ 1118 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state = 1119 DCPC_INTR_FREE; 1120 membar_producer(); 1121 } 1122 return (DDI_INTR_CLAIMED); 1123 } 1124 1125 /* 1126 * DTrace isn't involved so pass on accordingly. 1127 * 1128 * If the interrupt has occurred in the context of an lwp owning 1129 * the counters, then the handler posts an AST to the lwp to 1130 * trigger the actual sampling, and optionally deliver a signal or 1131 * restart the counters, on the way out of the kernel using 1132 * kcpc_hw_overflow_ast() (see below). 1133 * 1134 * On the other hand, if the handler returns the context to us 1135 * directly, then it means that there are no other threads in 1136 * the middle of updating it, no AST has been posted, and so we 1137 * should sample the counters here, and restart them with no 1138 * further fuss. 1139 * 1140 * The CPU's CPC context may disappear as a result of cross-call which 1141 * has higher PIL on x86, so protect the context by raising PIL to the 1142 * cross-call level. 1143 */ 1144 save_spl = spl_xcall(); 1145 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 1146 uint64_t curtick = KCPC_GET_TICK(); 1147 1148 ctx->kc_hrtime = gethrtime_waitfree(); 1149 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1150 ctx->kc_rawtick = curtick; 1151 pcbe_ops->pcbe_sample(ctx); 1152 pcbe_ops->pcbe_program(ctx); 1153 } 1154 splx(save_spl); 1155 1156 return (DDI_INTR_CLAIMED); 1157 } 1158 1159 /* 1160 * Called from trap() when processing the ast posted by the high-level 1161 * interrupt handler. 1162 */ 1163 int 1164 kcpc_overflow_ast() 1165 { 1166 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1167 int i; 1168 int found = 0; 1169 uint64_t curtick = KCPC_GET_TICK(); 1170 1171 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 1172 1173 /* 1174 * An overflow happened: sample the context to ensure that 1175 * the overflow is propagated into the upper bits of the 1176 * virtualized 64-bit counter(s). 1177 */ 1178 kpreempt_disable(); 1179 ctx->kc_hrtime = gethrtime_waitfree(); 1180 pcbe_ops->pcbe_sample(ctx); 1181 kpreempt_enable(); 1182 1183 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1184 1185 /* 1186 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 1187 * if that pic generated an overflow and if the request it was counting 1188 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 1189 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 1190 * found any overflowed pics, keep the context frozen and return true 1191 * (thus causing a signal to be sent). 1192 */ 1193 for (i = 0; i < cpc_ncounters; i++) { 1194 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1195 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1196 ~KCPC_PIC_OVERFLOWED); 1197 found = 1; 1198 } 1199 } 1200 if (found) 1201 return (1); 1202 1203 /* 1204 * Otherwise, re-enable the counters and continue life as before. 1205 */ 1206 kpreempt_disable(); 1207 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 1208 pcbe_ops->pcbe_program(ctx); 1209 kpreempt_enable(); 1210 return (0); 1211 } 1212 1213 /* 1214 * Called when switching away from current thread. 1215 */ 1216 static void 1217 kcpc_save(kcpc_ctx_t *ctx) 1218 { 1219 int err; 1220 int save_spl; 1221 1222 kpreempt_disable(); 1223 save_spl = spl_xcall(); 1224 1225 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1226 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1227 splx(save_spl); 1228 kpreempt_enable(); 1229 return; 1230 } 1231 /* 1232 * This context has been invalidated but the counters have not 1233 * been stopped. Stop them here and mark the context stopped. 1234 */ 1235 kcpc_unprogram(ctx, B_TRUE); 1236 splx(save_spl); 1237 kpreempt_enable(); 1238 return; 1239 } 1240 1241 pcbe_ops->pcbe_allstop(); 1242 if (ctx->kc_flags & KCPC_CTX_FREEZE) { 1243 splx(save_spl); 1244 kpreempt_enable(); 1245 return; 1246 } 1247 1248 /* 1249 * Need to sample for all reqs into each req's current mpic. 1250 */ 1251 ctx->kc_hrtime = gethrtime_waitfree(); 1252 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1253 pcbe_ops->pcbe_sample(ctx); 1254 1255 /* 1256 * Program counter for measuring capacity and utilization since user 1257 * thread isn't using counter anymore 1258 */ 1259 ASSERT(ctx->kc_cpuid == -1); 1260 cu_cpc_program(CPU, &err); 1261 splx(save_spl); 1262 kpreempt_enable(); 1263 } 1264 1265 static void 1266 kcpc_restore(kcpc_ctx_t *ctx) 1267 { 1268 int save_spl; 1269 1270 mutex_enter(&ctx->kc_lock); 1271 1272 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1273 KCPC_CTX_INVALID) { 1274 /* 1275 * The context is invalidated but has not been marked stopped. 1276 * We mark it as such here because we will not start the 1277 * counters during this context switch. 1278 */ 1279 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 1280 } 1281 1282 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1283 mutex_exit(&ctx->kc_lock); 1284 return; 1285 } 1286 1287 /* 1288 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1289 * ctx & set related memory objects being freed without us knowing. 1290 * This can happen if an agent thread is executing a kcpc_unbind(), 1291 * with this thread as the target, whilst we're concurrently doing a 1292 * restorectx() during, for example, a proc_exit(). Effectively, by 1293 * doing this, we're asking kcpc_free() to cv_wait() until 1294 * kcpc_restore() has completed. 1295 */ 1296 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE); 1297 mutex_exit(&ctx->kc_lock); 1298 1299 /* 1300 * While programming the hardware, the counters should be stopped. We 1301 * don't do an explicit pcbe_allstop() here because they should have 1302 * been stopped already by the last consumer. 1303 */ 1304 kpreempt_disable(); 1305 save_spl = spl_xcall(); 1306 kcpc_program(ctx, B_TRUE, B_TRUE); 1307 splx(save_spl); 1308 kpreempt_enable(); 1309 1310 /* 1311 * Wake the agent thread if it's waiting in kcpc_free(). 1312 */ 1313 mutex_enter(&ctx->kc_lock); 1314 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE); 1315 cv_signal(&ctx->kc_condv); 1316 mutex_exit(&ctx->kc_lock); 1317 } 1318 1319 /* 1320 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1321 * following context operators to the idle thread on each CPU. They stop the 1322 * counters when the idle thread is switched on, and they start them again when 1323 * it is switched off. 1324 */ 1325 /*ARGSUSED*/ 1326 void 1327 kcpc_idle_save(struct cpu *cp) 1328 { 1329 /* 1330 * The idle thread shouldn't be run anywhere else. 1331 */ 1332 ASSERT(CPU == cp); 1333 1334 /* 1335 * We must hold the CPU's context lock to ensure the context isn't freed 1336 * while we're looking at it. 1337 */ 1338 mutex_enter(&cp->cpu_cpc_ctxlock); 1339 1340 if ((cp->cpu_cpc_ctx == NULL) || 1341 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1342 mutex_exit(&cp->cpu_cpc_ctxlock); 1343 return; 1344 } 1345 1346 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1347 mutex_exit(&cp->cpu_cpc_ctxlock); 1348 } 1349 1350 void 1351 kcpc_idle_restore(struct cpu *cp) 1352 { 1353 /* 1354 * The idle thread shouldn't be run anywhere else. 1355 */ 1356 ASSERT(CPU == cp); 1357 1358 /* 1359 * We must hold the CPU's context lock to ensure the context isn't freed 1360 * while we're looking at it. 1361 */ 1362 mutex_enter(&cp->cpu_cpc_ctxlock); 1363 1364 if ((cp->cpu_cpc_ctx == NULL) || 1365 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1366 mutex_exit(&cp->cpu_cpc_ctxlock); 1367 return; 1368 } 1369 1370 pcbe_ops->pcbe_allstop(); 1371 mutex_exit(&cp->cpu_cpc_ctxlock); 1372 } 1373 1374 /*ARGSUSED*/ 1375 static void 1376 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1377 { 1378 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1379 int i; 1380 1381 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1382 return; 1383 1384 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1385 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1386 rw_exit(&kcpc_cpuctx_lock); 1387 return; 1388 } 1389 cctx = kcpc_ctx_alloc(KM_SLEEP); 1390 kcpc_ctx_clone(ctx, cctx); 1391 rw_exit(&kcpc_cpuctx_lock); 1392 1393 /* 1394 * Copy the parent context's kc_flags field, but don't overwrite 1395 * the child's in case it was modified during kcpc_ctx_clone. 1396 */ 1397 KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags); 1398 cctx->kc_thread = ct; 1399 cctx->kc_cpuid = -1; 1400 ct->t_cpc_set = cctx->kc_set; 1401 ct->t_cpc_ctx = cctx; 1402 1403 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1404 kcpc_set_t *ks = cctx->kc_set; 1405 /* 1406 * Our contract with the user requires us to immediately send an 1407 * overflow signal to all children if we have the LWPINHERIT 1408 * and SIGOVF flags set. In addition, all counters should be 1409 * set to UINT64_MAX, and their pic's overflow flag turned on 1410 * so that our trap() processing knows to send a signal. 1411 */ 1412 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1413 for (i = 0; i < ks->ks_nreqs; i++) { 1414 kcpc_request_t *kr = &ks->ks_req[i]; 1415 1416 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1417 *(kr->kr_data) = UINT64_MAX; 1418 atomic_or_uint(&kr->kr_picp->kp_flags, 1419 KCPC_PIC_OVERFLOWED); 1420 } 1421 } 1422 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1423 aston(ct); 1424 } 1425 1426 installctx(ct, cctx, kcpc_save, kcpc_restore, 1427 NULL, kcpc_lwp_create, NULL, kcpc_free); 1428 } 1429 1430 /* 1431 * Counter Stoppage Theory 1432 * 1433 * The counters may need to be stopped properly at the following occasions: 1434 * 1435 * 1) An LWP exits. 1436 * 2) A thread exits. 1437 * 3) An LWP performs an exec(). 1438 * 4) A bound set is unbound. 1439 * 1440 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1441 * to be freed as well. 1442 * 1443 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1444 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1445 * context. 1446 * 1447 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1448 * 1449 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1450 * been called from exec. It stops the counters _and_ frees the context. 1451 * 1452 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1453 * 1454 * CPU-bound counters are always stopped via kcpc_unbind(). 1455 */ 1456 1457 /* 1458 * We're being called to delete the context; we ensure that all associated data 1459 * structures are freed, and that the hardware is passivated if this is an exec. 1460 */ 1461 1462 /*ARGSUSED*/ 1463 void 1464 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1465 { 1466 int i; 1467 kcpc_set_t *set = ctx->kc_set; 1468 1469 ASSERT(set != NULL); 1470 1471 /* 1472 * Wait for kcpc_restore() to finish before we tear things down. 1473 */ 1474 mutex_enter(&ctx->kc_lock); 1475 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1476 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1477 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1478 mutex_exit(&ctx->kc_lock); 1479 1480 if (isexec) { 1481 /* 1482 * This thread is execing, and after the exec it should not have 1483 * any performance counter context. Stop the counters properly 1484 * here so the system isn't surprised by an overflow interrupt 1485 * later. 1486 */ 1487 if (ctx->kc_cpuid != -1) { 1488 cpu_t *cp; 1489 /* 1490 * CPU-bound context; stop the appropriate CPU's ctrs. 1491 * Hold cpu_lock while examining the CPU to ensure it 1492 * doesn't go away. 1493 */ 1494 mutex_enter(&cpu_lock); 1495 cp = cpu_get(ctx->kc_cpuid); 1496 /* 1497 * The CPU could have been DR'd out, so only stop the 1498 * CPU and clear its context pointer if the CPU still 1499 * exists. 1500 */ 1501 if (cp != NULL) { 1502 mutex_enter(&cp->cpu_cpc_ctxlock); 1503 kcpc_stop_hw(ctx); 1504 mutex_exit(&cp->cpu_cpc_ctxlock); 1505 } 1506 mutex_exit(&cpu_lock); 1507 ASSERT(curthread->t_cpc_ctx == NULL); 1508 } else { 1509 int save_spl; 1510 1511 /* 1512 * Thread-bound context; stop _this_ CPU's counters. 1513 */ 1514 kpreempt_disable(); 1515 save_spl = spl_xcall(); 1516 kcpc_unprogram(ctx, B_TRUE); 1517 curthread->t_cpc_ctx = NULL; 1518 splx(save_spl); 1519 kpreempt_enable(); 1520 } 1521 1522 /* 1523 * Since we are being called from an exec and we know that 1524 * exec is not permitted via the agent thread, we should clean 1525 * up this thread's CPC state completely, and not leave dangling 1526 * CPC pointers behind. 1527 */ 1528 ASSERT(ctx->kc_thread == curthread); 1529 curthread->t_cpc_set = NULL; 1530 } 1531 1532 /* 1533 * Walk through each request in this context's set and free the PCBE's 1534 * configuration if it exists. 1535 */ 1536 for (i = 0; i < set->ks_nreqs; i++) { 1537 if (set->ks_req[i].kr_config != NULL) 1538 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1539 } 1540 1541 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1542 kcpc_ctx_free(ctx); 1543 kcpc_free_set(set); 1544 } 1545 1546 /* 1547 * Free the memory associated with a request set. 1548 */ 1549 void 1550 kcpc_free_set(kcpc_set_t *set) 1551 { 1552 int i; 1553 kcpc_request_t *req; 1554 1555 ASSERT(set->ks_req != NULL); 1556 1557 for (i = 0; i < set->ks_nreqs; i++) { 1558 req = &set->ks_req[i]; 1559 1560 if (req->kr_nattrs != 0) { 1561 kmem_free(req->kr_attr, 1562 req->kr_nattrs * sizeof (kcpc_attr_t)); 1563 } 1564 } 1565 1566 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1567 cv_destroy(&set->ks_condv); 1568 mutex_destroy(&set->ks_lock); 1569 kmem_free(set, sizeof (kcpc_set_t)); 1570 } 1571 1572 /* 1573 * Grab every existing context and mark it as invalid. 1574 */ 1575 void 1576 kcpc_invalidate_all(void) 1577 { 1578 kcpc_ctx_t *ctx; 1579 long hash; 1580 1581 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1582 mutex_enter(&kcpc_ctx_llock[hash]); 1583 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1584 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1585 mutex_exit(&kcpc_ctx_llock[hash]); 1586 } 1587 } 1588 1589 /* 1590 * Interface for PCBEs to signal that an existing configuration has suddenly 1591 * become invalid. 1592 */ 1593 void 1594 kcpc_invalidate_config(void *token) 1595 { 1596 kcpc_ctx_t *ctx = token; 1597 1598 ASSERT(ctx != NULL); 1599 1600 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1601 } 1602 1603 /* 1604 * Called from lwp_exit() and thread_exit() 1605 */ 1606 void 1607 kcpc_passivate(void) 1608 { 1609 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1610 kcpc_set_t *set = curthread->t_cpc_set; 1611 int save_spl; 1612 1613 if (set == NULL) 1614 return; 1615 1616 if (ctx == NULL) { 1617 /* 1618 * This thread has a set but no context; it must be a CPU-bound 1619 * set. The hardware will be stopped via kcpc_unbind() when the 1620 * process exits and closes its file descriptors with 1621 * kcpc_close(). Our only job here is to clean up this thread's 1622 * state; the set will be freed with the unbind(). 1623 */ 1624 (void) kcpc_unbind(set); 1625 /* 1626 * Unbinding a set belonging to the current thread should clear 1627 * its set pointer. 1628 */ 1629 ASSERT(curthread->t_cpc_set == NULL); 1630 return; 1631 } 1632 1633 kpreempt_disable(); 1634 save_spl = spl_xcall(); 1635 curthread->t_cpc_set = NULL; 1636 1637 /* 1638 * This thread/LWP is exiting but context switches will continue to 1639 * happen for a bit as the exit proceeds. Kernel preemption must be 1640 * disabled here to prevent a race between checking or setting the 1641 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1642 * a context switch. 1643 */ 1644 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1645 kcpc_unprogram(ctx, B_TRUE); 1646 KCPC_CTX_FLAG_SET(ctx, 1647 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1648 } 1649 1650 /* 1651 * We're cleaning up after this thread; ensure there are no dangling 1652 * CPC pointers left behind. The context and set will be freed by 1653 * freectx(). 1654 */ 1655 curthread->t_cpc_ctx = NULL; 1656 1657 splx(save_spl); 1658 kpreempt_enable(); 1659 } 1660 1661 /* 1662 * Assign the requests in the given set to the PICs in the context. 1663 * Returns 0 if successful, -1 on failure. 1664 */ 1665 /*ARGSUSED*/ 1666 int 1667 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1668 { 1669 int i; 1670 int *picnum_save; 1671 1672 ASSERT(set->ks_nreqs <= cpc_ncounters); 1673 1674 /* 1675 * Provide kcpc_tryassign() with scratch space to avoid doing an 1676 * alloc/free with every invocation. 1677 */ 1678 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1679 /* 1680 * kcpc_tryassign() blindly walks through each request in the set, 1681 * seeing if a counter can count its event. If yes, it assigns that 1682 * counter. However, that counter may have been the only capable counter 1683 * for _another_ request's event. The solution is to try every possible 1684 * request first. Note that this does not cover all solutions, as 1685 * that would require all unique orderings of requests, an n^n operation 1686 * which would be unacceptable for architectures with many counters. 1687 */ 1688 for (i = 0; i < set->ks_nreqs; i++) 1689 if (kcpc_tryassign(set, i, picnum_save) == 0) 1690 break; 1691 1692 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1693 if (i == set->ks_nreqs) 1694 return (-1); 1695 return (0); 1696 } 1697 1698 static int 1699 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1700 { 1701 int i; 1702 int j; 1703 uint64_t bitmap = 0, resmap = 0; 1704 uint64_t ctrmap; 1705 1706 /* 1707 * We are attempting to assign the reqs to pics, but we may fail. If we 1708 * fail, we need to restore the state of the requests to what it was 1709 * when we found it, as some reqs may have been explicitly assigned to 1710 * a specific PIC beforehand. We do this by snapshotting the assignments 1711 * now and restoring from it later if we fail. 1712 * 1713 * Also we note here which counters have already been claimed by 1714 * requests with explicit counter assignments. 1715 */ 1716 for (i = 0; i < set->ks_nreqs; i++) { 1717 scratch[i] = set->ks_req[i].kr_picnum; 1718 if (set->ks_req[i].kr_picnum != -1) 1719 resmap |= (1 << set->ks_req[i].kr_picnum); 1720 } 1721 1722 /* 1723 * Walk through requests assigning them to the first PIC that is 1724 * capable. 1725 */ 1726 i = starting_req; 1727 do { 1728 if (set->ks_req[i].kr_picnum != -1) { 1729 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1730 bitmap |= (1 << set->ks_req[i].kr_picnum); 1731 if (++i == set->ks_nreqs) 1732 i = 0; 1733 continue; 1734 } 1735 1736 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1737 for (j = 0; j < cpc_ncounters; j++) { 1738 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1739 (resmap & (1 << j)) == 0) { 1740 /* 1741 * We can assign this counter because: 1742 * 1743 * 1. It can count the event (ctrmap) 1744 * 2. It hasn't been assigned yet (bitmap) 1745 * 3. It wasn't reserved by a request (resmap) 1746 */ 1747 bitmap |= (1 << j); 1748 break; 1749 } 1750 } 1751 if (j == cpc_ncounters) { 1752 for (i = 0; i < set->ks_nreqs; i++) 1753 set->ks_req[i].kr_picnum = scratch[i]; 1754 return (-1); 1755 } 1756 set->ks_req[i].kr_picnum = j; 1757 1758 if (++i == set->ks_nreqs) 1759 i = 0; 1760 } while (i != starting_req); 1761 1762 return (0); 1763 } 1764 1765 kcpc_set_t * 1766 kcpc_dup_set(kcpc_set_t *set) 1767 { 1768 kcpc_set_t *new; 1769 int i; 1770 int j; 1771 1772 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1773 new->ks_state &= ~KCPC_SET_BOUND; 1774 new->ks_flags = set->ks_flags; 1775 new->ks_nreqs = set->ks_nreqs; 1776 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1777 KM_SLEEP); 1778 new->ks_data = NULL; 1779 new->ks_ctx = NULL; 1780 1781 for (i = 0; i < new->ks_nreqs; i++) { 1782 new->ks_req[i].kr_config = NULL; 1783 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1784 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1785 new->ks_req[i].kr_picp = NULL; 1786 new->ks_req[i].kr_data = NULL; 1787 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1788 CPC_MAX_EVENT_LEN); 1789 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1790 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1791 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1792 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1793 sizeof (kcpc_attr_t), KM_SLEEP); 1794 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1795 new->ks_req[i].kr_attr[j].ka_val = 1796 set->ks_req[i].kr_attr[j].ka_val; 1797 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1798 set->ks_req[i].kr_attr[j].ka_name, 1799 CPC_MAX_ATTR_LEN); 1800 } 1801 } 1802 1803 return (new); 1804 } 1805 1806 int 1807 kcpc_allow_nonpriv(void *token) 1808 { 1809 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1810 } 1811 1812 void 1813 kcpc_invalidate(kthread_t *t) 1814 { 1815 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1816 1817 if (ctx != NULL) 1818 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1819 } 1820 1821 /* 1822 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1823 * are used to construct PCBE names, starting with the most specific, 1824 * "pcbe.first.second.third.fourth" and ending with the least specific, 1825 * "pcbe.first". 1826 * 1827 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1828 */ 1829 int 1830 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1831 { 1832 uint_t s[3]; 1833 1834 s[0] = first; 1835 s[1] = second; 1836 s[2] = third; 1837 1838 return (modload_qualified("pcbe", 1839 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1840 } 1841 1842 /* 1843 * Create one or more CPC context for given CPU with specified counter event 1844 * requests 1845 * 1846 * If number of requested counter events is less than or equal number of 1847 * hardware counters on a CPU and can all be assigned to the counters on a CPU 1848 * at the same time, then make one CPC context. 1849 * 1850 * Otherwise, multiple CPC contexts are created to allow multiplexing more 1851 * counter events than existing counters onto the counters by iterating through 1852 * all of the CPC contexts, programming the counters with each CPC context one 1853 * at a time and measuring the resulting counter values. Each of the resulting 1854 * CPC contexts contains some number of requested counter events less than or 1855 * equal the number of counters on a CPU depending on whether all the counter 1856 * events can be programmed on all the counters at the same time or not. 1857 * 1858 * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying 1859 * whether memory allocation should be non-blocking or not. The code will try 1860 * to allocate *whole* CPC contexts if possible. If there is any memory 1861 * allocation failure during the allocations needed for a given CPC context, it 1862 * will skip allocating that CPC context because it cannot allocate the whole 1863 * thing. Thus, the only time that it will end up allocating none (ie. no CPC 1864 * contexts whatsoever) is when it cannot even allocate *one* whole CPC context 1865 * without a memory allocation failure occurring. 1866 */ 1867 int 1868 kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags, 1869 kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz) 1870 { 1871 kcpc_ctx_t **ctx_ptrs; 1872 int nctx; 1873 int nctx_ptrs; 1874 int nreqs; 1875 kcpc_request_t *reqs; 1876 1877 if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL || 1878 req_list == NULL || req_list->krl_cnt < 1) 1879 return (-1); 1880 1881 /* 1882 * Allocate number of sets assuming that each set contains one and only 1883 * one counter event request for each counter on a CPU 1884 */ 1885 nreqs = req_list->krl_cnt; 1886 nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters; 1887 ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags); 1888 if (ctx_ptrs == NULL) 1889 return (-2); 1890 1891 /* 1892 * Fill in sets of requests 1893 */ 1894 nctx = 0; 1895 reqs = req_list->krl_list; 1896 while (nreqs > 0) { 1897 kcpc_ctx_t *ctx; 1898 kcpc_set_t *set; 1899 int subcode; 1900 1901 /* 1902 * Allocate CPC context and set for requested counter events 1903 */ 1904 ctx = kcpc_ctx_alloc(kmem_flags); 1905 set = kcpc_set_create(reqs, nreqs, 0, kmem_flags); 1906 if (set == NULL) { 1907 kcpc_ctx_free(ctx); 1908 break; 1909 } 1910 1911 /* 1912 * Determine assignment of requested counter events to specific 1913 * counters 1914 */ 1915 if (kcpc_assign_reqs(set, ctx) != 0) { 1916 /* 1917 * May not be able to assign requested counter events 1918 * to all counters since all counters may not be able 1919 * to do all events, so only do one counter event in 1920 * set of counter requests when this happens since at 1921 * least one of the counters must be able to do the 1922 * event. 1923 */ 1924 kcpc_free_set(set); 1925 set = kcpc_set_create(reqs, 1, 0, kmem_flags); 1926 if (set == NULL) { 1927 kcpc_ctx_free(ctx); 1928 break; 1929 } 1930 if (kcpc_assign_reqs(set, ctx) != 0) { 1931 #ifdef DEBUG 1932 cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't " 1933 "assign counter event %s!\n", 1934 set->ks_req->kr_event); 1935 #endif 1936 kcpc_free_set(set); 1937 kcpc_ctx_free(ctx); 1938 reqs++; 1939 nreqs--; 1940 continue; 1941 } 1942 } 1943 1944 /* 1945 * Allocate memory needed to hold requested counter event data 1946 */ 1947 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), 1948 kmem_flags); 1949 if (set->ks_data == NULL) { 1950 kcpc_free_set(set); 1951 kcpc_ctx_free(ctx); 1952 break; 1953 } 1954 1955 /* 1956 * Configure requested counter events 1957 */ 1958 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) { 1959 #ifdef DEBUG 1960 cmn_err(CE_NOTE, 1961 "!kcpc_cpu_ctx_create: can't configure " 1962 "set of counter event requests!\n"); 1963 #endif 1964 reqs += set->ks_nreqs; 1965 nreqs -= set->ks_nreqs; 1966 kmem_free(set->ks_data, 1967 set->ks_nreqs * sizeof (uint64_t)); 1968 kcpc_free_set(set); 1969 kcpc_ctx_free(ctx); 1970 continue; 1971 } 1972 1973 /* 1974 * Point set of counter event requests at this context and fill 1975 * in CPC context 1976 */ 1977 set->ks_ctx = ctx; 1978 ctx->kc_set = set; 1979 ctx->kc_cpuid = cp->cpu_id; 1980 ctx->kc_thread = curthread; 1981 1982 ctx_ptrs[nctx] = ctx; 1983 1984 /* 1985 * Update requests and how many are left to be assigned to sets 1986 */ 1987 reqs += set->ks_nreqs; 1988 nreqs -= set->ks_nreqs; 1989 1990 /* 1991 * Increment number of CPC contexts and allocate bigger array 1992 * for context pointers as needed 1993 */ 1994 nctx++; 1995 if (nctx >= nctx_ptrs) { 1996 kcpc_ctx_t **new; 1997 int new_cnt; 1998 1999 /* 2000 * Allocate more CPC contexts based on how many 2001 * contexts allocated so far and how many counter 2002 * requests left to assign 2003 */ 2004 new_cnt = nctx_ptrs + 2005 ((nreqs + cpc_ncounters - 1) / cpc_ncounters); 2006 new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *), 2007 kmem_flags); 2008 if (new == NULL) 2009 break; 2010 2011 /* 2012 * Copy contents of old sets into new ones 2013 */ 2014 bcopy(ctx_ptrs, new, 2015 nctx_ptrs * sizeof (kcpc_ctx_t *)); 2016 2017 /* 2018 * Free old array of context pointers and use newly 2019 * allocated one instead now 2020 */ 2021 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2022 ctx_ptrs = new; 2023 nctx_ptrs = new_cnt; 2024 } 2025 } 2026 2027 /* 2028 * Return NULL if no CPC contexts filled in 2029 */ 2030 if (nctx == 0) { 2031 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2032 *ctx_ptr_array = NULL; 2033 *ctx_ptr_array_sz = 0; 2034 return (-2); 2035 } 2036 2037 *ctx_ptr_array = ctx_ptrs; 2038 *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *); 2039 return (nctx); 2040 } 2041 2042 /* 2043 * Return whether PCBE supports given counter event 2044 */ 2045 boolean_t 2046 kcpc_event_supported(char *event) 2047 { 2048 if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0) 2049 return (B_FALSE); 2050 2051 return (B_TRUE); 2052 } 2053 2054 /* 2055 * Program counters on current CPU with given CPC context 2056 * 2057 * If kernel is interposing on counters to measure hardware capacity and 2058 * utilization, then unprogram counters for kernel *before* programming them 2059 * with specified CPC context. 2060 * 2061 * kcpc_{program,unprogram}() may be called either directly by a thread running 2062 * on the target CPU or from a cross-call from another CPU. To protect 2063 * programming and unprogramming from being interrupted by cross-calls, callers 2064 * who execute kcpc_{program,unprogram} should raise PIL to the level used by 2065 * cross-calls. 2066 */ 2067 void 2068 kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose) 2069 { 2070 int error; 2071 2072 ASSERT(IS_HIPIL()); 2073 2074 /* 2075 * CPC context shouldn't be NULL, its CPU field should specify current 2076 * CPU or be -1 to specify any CPU when the context is bound to a 2077 * thread, and preemption should be disabled 2078 */ 2079 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2080 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2081 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2082 ctx->kc_cpuid != -1) || curthread->t_preempt < 1) 2083 return; 2084 2085 /* 2086 * Unprogram counters for kernel measuring hardware capacity and 2087 * utilization 2088 */ 2089 if (cu_interpose == B_TRUE) { 2090 cu_cpc_unprogram(CPU, &error); 2091 } else { 2092 kcpc_set_t *set = ctx->kc_set; 2093 int i; 2094 2095 ASSERT(set != NULL); 2096 2097 /* 2098 * Since cu_interpose is false, we are programming CU context. 2099 * In general, PCBE can continue from the state saved in the 2100 * set, but it is not very reliable, so we start again from the 2101 * preset value. 2102 */ 2103 for (i = 0; i < set->ks_nreqs; i++) { 2104 /* 2105 * Reset the virtual counter value to the preset value. 2106 */ 2107 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 2108 2109 /* 2110 * Reset PCBE to the preset value. 2111 */ 2112 pcbe_ops->pcbe_configure(0, NULL, 2113 set->ks_req[i].kr_preset, 2114 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 2115 } 2116 } 2117 2118 /* 2119 * Program counters with specified CPC context 2120 */ 2121 ctx->kc_rawtick = KCPC_GET_TICK(); 2122 pcbe_ops->pcbe_program(ctx); 2123 2124 /* 2125 * Denote that counters programmed for thread or CPU CPC context 2126 * differently 2127 */ 2128 if (for_thread == B_TRUE) 2129 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 2130 else 2131 CPU->cpu_cpc_ctx = ctx; 2132 } 2133 2134 /* 2135 * Unprogram counters with given CPC context on current CPU 2136 * 2137 * If kernel is interposing on counters to measure hardware capacity and 2138 * utilization, then program counters for the kernel capacity and utilization 2139 * *after* unprogramming them for given CPC context. 2140 * 2141 * See the comment for kcpc_program regarding the synchronization with 2142 * cross-calls. 2143 */ 2144 void 2145 kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose) 2146 { 2147 int error; 2148 2149 ASSERT(IS_HIPIL()); 2150 2151 /* 2152 * CPC context shouldn't be NULL, its CPU field should specify current 2153 * CPU or be -1 to specify any CPU when the context is bound to a 2154 * thread, and preemption should be disabled 2155 */ 2156 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2157 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2158 2159 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2160 ctx->kc_cpuid != -1) || curthread->t_preempt < 1 || 2161 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) { 2162 return; 2163 } 2164 2165 /* 2166 * Specified CPC context to be unprogrammed should be bound to current 2167 * CPU or thread 2168 */ 2169 ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx); 2170 2171 /* 2172 * Stop counters 2173 */ 2174 pcbe_ops->pcbe_allstop(); 2175 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 2176 2177 /* 2178 * Allow kernel to interpose on counters and program them for its own 2179 * use to measure hardware capacity and utilization if cu_interpose 2180 * argument is true 2181 */ 2182 if (cu_interpose == B_TRUE) 2183 cu_cpc_program(CPU, &error); 2184 } 2185 2186 /* 2187 * Read CPU Performance Counter (CPC) on current CPU and call specified update 2188 * routine with data for each counter event currently programmed on CPU 2189 */ 2190 int 2191 kcpc_read(kcpc_update_func_t update_func) 2192 { 2193 kcpc_ctx_t *ctx; 2194 int i; 2195 kcpc_request_t *req; 2196 int retval; 2197 kcpc_set_t *set; 2198 2199 ASSERT(IS_HIPIL()); 2200 2201 /* 2202 * Can't grab locks or block because may be called inside dispatcher 2203 */ 2204 kpreempt_disable(); 2205 2206 ctx = CPU->cpu_cpc_ctx; 2207 if (ctx == NULL) { 2208 kpreempt_enable(); 2209 return (0); 2210 } 2211 2212 /* 2213 * Read counter data from current CPU 2214 */ 2215 pcbe_ops->pcbe_sample(ctx); 2216 2217 set = ctx->kc_set; 2218 if (set == NULL || set->ks_req == NULL) { 2219 kpreempt_enable(); 2220 return (0); 2221 } 2222 2223 /* 2224 * Call update function with preset pointer and data for each CPC event 2225 * request currently programmed on current CPU 2226 */ 2227 req = set->ks_req; 2228 retval = 0; 2229 for (i = 0; i < set->ks_nreqs; i++) { 2230 int ret; 2231 2232 if (req[i].kr_data == NULL) 2233 break; 2234 2235 ret = update_func(req[i].kr_ptr, *req[i].kr_data); 2236 if (ret < 0) 2237 retval = ret; 2238 } 2239 2240 kpreempt_enable(); 2241 2242 return (retval); 2243 } 2244 2245 /* 2246 * Initialize list of counter event requests 2247 */ 2248 kcpc_request_list_t * 2249 kcpc_reqs_init(int nreqs, int kmem_flags) 2250 { 2251 kcpc_request_list_t *req_list; 2252 kcpc_request_t *reqs; 2253 2254 if (nreqs < 1) 2255 return (NULL); 2256 2257 req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags); 2258 if (req_list == NULL) 2259 return (NULL); 2260 2261 reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags); 2262 if (reqs == NULL) { 2263 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2264 return (NULL); 2265 } 2266 2267 req_list->krl_list = reqs; 2268 req_list->krl_cnt = 0; 2269 req_list->krl_max = nreqs; 2270 return (req_list); 2271 } 2272 2273 2274 /* 2275 * Add counter event request to given list of counter event requests 2276 */ 2277 int 2278 kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset, 2279 uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags) 2280 { 2281 kcpc_request_t *req; 2282 2283 if (req_list == NULL || req_list->krl_list == NULL) 2284 return (-1); 2285 2286 ASSERT(req_list->krl_max != 0); 2287 2288 /* 2289 * Allocate more space (if needed) 2290 */ 2291 if (req_list->krl_cnt > req_list->krl_max) { 2292 kcpc_request_t *new; 2293 kcpc_request_t *old; 2294 2295 old = req_list->krl_list; 2296 new = kmem_zalloc((req_list->krl_max + 2297 cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags); 2298 if (new == NULL) 2299 return (-2); 2300 2301 req_list->krl_list = new; 2302 bcopy(old, req_list->krl_list, 2303 req_list->krl_cnt * sizeof (kcpc_request_t)); 2304 kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t)); 2305 req_list->krl_cnt = 0; 2306 req_list->krl_max += cpc_ncounters; 2307 } 2308 2309 /* 2310 * Fill in request as much as possible now, but some fields will need 2311 * to be set when request is assigned to a set. 2312 */ 2313 req = &req_list->krl_list[req_list->krl_cnt]; 2314 req->kr_config = NULL; 2315 req->kr_picnum = -1; /* have CPC pick this */ 2316 req->kr_index = -1; /* set when assigning request to set */ 2317 req->kr_data = NULL; /* set when configuring request */ 2318 (void) strcpy(req->kr_event, event); 2319 req->kr_preset = preset; 2320 req->kr_flags = flags; 2321 req->kr_nattrs = nattrs; 2322 req->kr_attr = attr; 2323 /* 2324 * Keep pointer given by caller to give to update function when this 2325 * counter event is sampled/read 2326 */ 2327 req->kr_ptr = ptr; 2328 2329 req_list->krl_cnt++; 2330 2331 return (0); 2332 } 2333 2334 /* 2335 * Reset list of CPC event requests so its space can be used for another set 2336 * of requests 2337 */ 2338 int 2339 kcpc_reqs_reset(kcpc_request_list_t *req_list) 2340 { 2341 /* 2342 * Return when pointer to request list structure or request is NULL or 2343 * when max requests is less than or equal to 0 2344 */ 2345 if (req_list == NULL || req_list->krl_list == NULL || 2346 req_list->krl_max <= 0) 2347 return (-1); 2348 2349 /* 2350 * Zero out requests and number of requests used 2351 */ 2352 bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t)); 2353 req_list->krl_cnt = 0; 2354 return (0); 2355 } 2356 2357 /* 2358 * Free given list of counter event requests 2359 */ 2360 int 2361 kcpc_reqs_fini(kcpc_request_list_t *req_list) 2362 { 2363 kmem_free(req_list->krl_list, 2364 req_list->krl_max * sizeof (kcpc_request_t)); 2365 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2366 return (0); 2367 } 2368 2369 /* 2370 * Create set of given counter event requests 2371 */ 2372 static kcpc_set_t * 2373 kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags) 2374 { 2375 int i; 2376 kcpc_set_t *set; 2377 2378 /* 2379 * Allocate set and assign number of requests in set and flags 2380 */ 2381 set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags); 2382 if (set == NULL) 2383 return (NULL); 2384 2385 if (nreqs < cpc_ncounters) 2386 set->ks_nreqs = nreqs; 2387 else 2388 set->ks_nreqs = cpc_ncounters; 2389 2390 set->ks_flags = set_flags; 2391 2392 /* 2393 * Allocate requests needed, copy requests into set, and set index into 2394 * data for each request (which may change when we assign requested 2395 * counter events to counters) 2396 */ 2397 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * 2398 set->ks_nreqs, kmem_flags); 2399 if (set->ks_req == NULL) { 2400 kmem_free(set, sizeof (kcpc_set_t)); 2401 return (NULL); 2402 } 2403 2404 bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 2405 2406 for (i = 0; i < set->ks_nreqs; i++) 2407 set->ks_req[i].kr_index = i; 2408 2409 return (set); 2410 } 2411 2412 2413 /* 2414 * Stop counters on current CPU. 2415 * 2416 * If preserve_context is true, the caller is interested in the CPU's CPC 2417 * context and wants it to be preserved. 2418 * 2419 * If preserve_context is false, the caller does not need the CPU's CPC context 2420 * to be preserved, so it is set to NULL. 2421 */ 2422 static void 2423 kcpc_cpustop_func(boolean_t preserve_context) 2424 { 2425 kpreempt_disable(); 2426 2427 /* 2428 * Someone already stopped this context before us, so there is nothing 2429 * to do. 2430 */ 2431 if (CPU->cpu_cpc_ctx == NULL) { 2432 kpreempt_enable(); 2433 return; 2434 } 2435 2436 kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE); 2437 /* 2438 * If CU does not use counters, then clear the CPU's CPC context 2439 * If the caller requested to preserve context it should disable CU 2440 * first, so there should be no CU context now. 2441 */ 2442 ASSERT(!preserve_context || !CU_CPC_ON(CPU)); 2443 if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU)) 2444 CPU->cpu_cpc_ctx = NULL; 2445 2446 kpreempt_enable(); 2447 } 2448 2449 /* 2450 * Stop counters on given CPU and set its CPC context to NULL unless 2451 * preserve_context is true. 2452 */ 2453 void 2454 kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context) 2455 { 2456 cpu_call(cp, (cpu_call_func_t)kcpc_cpustop_func, 2457 preserve_context, 0); 2458 } 2459 2460 /* 2461 * Program the context on the current CPU 2462 */ 2463 static void 2464 kcpc_remoteprogram_func(kcpc_ctx_t *ctx, uintptr_t arg) 2465 { 2466 boolean_t for_thread = (boolean_t)arg; 2467 2468 ASSERT(ctx != NULL); 2469 2470 kpreempt_disable(); 2471 kcpc_program(ctx, for_thread, B_TRUE); 2472 kpreempt_enable(); 2473 } 2474 2475 /* 2476 * Program counters on given CPU 2477 */ 2478 void 2479 kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx) 2480 { 2481 cpu_call(cp, (cpu_call_func_t)kcpc_remoteprogram_func, (uintptr_t)ctx, 2482 (uintptr_t)B_FALSE); 2483 } 2484 2485 char * 2486 kcpc_list_attrs(void) 2487 { 2488 ASSERT(pcbe_ops != NULL); 2489 2490 return (pcbe_ops->pcbe_list_attrs()); 2491 } 2492 2493 char * 2494 kcpc_list_events(uint_t pic) 2495 { 2496 ASSERT(pcbe_ops != NULL); 2497 2498 return (pcbe_ops->pcbe_list_events(pic)); 2499 } 2500 2501 uint_t 2502 kcpc_pcbe_capabilities(void) 2503 { 2504 ASSERT(pcbe_ops != NULL); 2505 2506 return (pcbe_ops->pcbe_caps); 2507 } 2508 2509 int 2510 kcpc_pcbe_loaded(void) 2511 { 2512 return (pcbe_ops == NULL ? -1 : 0); 2513 } 2514