1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2021 Joyent, Inc. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/thread.h> 29 #include <sys/cpuvar.h> 30 #include <sys/inttypes.h> 31 #include <sys/cmn_err.h> 32 #include <sys/time.h> 33 #include <sys/ksynch.h> 34 #include <sys/systm.h> 35 #include <sys/kcpc.h> 36 #include <sys/cpc_impl.h> 37 #include <sys/cpc_pcbe.h> 38 #include <sys/atomic.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/sdt.h> 42 #include <sys/archsystm.h> 43 #include <sys/promif.h> 44 #include <sys/x_call.h> 45 #include <sys/cap_util.h> 46 #if defined(__x86) 47 #include <asm/clock.h> 48 #include <sys/xc_levels.h> 49 #endif 50 51 static kmutex_t kcpc_ctx_llock[CPC_HASH_BUCKETS]; /* protects ctx_list */ 52 static kcpc_ctx_t *kcpc_ctx_list[CPC_HASH_BUCKETS]; /* head of list */ 53 54 55 krwlock_t kcpc_cpuctx_lock; /* lock for 'kcpc_cpuctx' below */ 56 int kcpc_cpuctx; /* number of cpu-specific contexts */ 57 58 int kcpc_counts_include_idle = 1; /* Project Private /etc/system variable */ 59 60 /* 61 * These are set when a PCBE module is loaded. 62 */ 63 uint_t cpc_ncounters = 0; 64 pcbe_ops_t *pcbe_ops = NULL; 65 66 /* 67 * Statistics on (mis)behavior 68 */ 69 static uint32_t kcpc_intrctx_count; /* # overflows in an interrupt handler */ 70 static uint32_t kcpc_nullctx_count; /* # overflows in a thread with no ctx */ 71 72 /* 73 * By setting 'kcpc_nullctx_panic' to 1, any overflow interrupts in a thread 74 * with no valid context will result in a panic. 75 */ 76 static int kcpc_nullctx_panic = 0; 77 78 static void kcpc_lwp_create(kthread_t *t, kthread_t *ct); 79 static void kcpc_restore(kcpc_ctx_t *ctx); 80 static void kcpc_save(kcpc_ctx_t *ctx); 81 static void kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx); 82 static int kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch); 83 static kcpc_set_t *kcpc_dup_set(kcpc_set_t *set); 84 static kcpc_set_t *kcpc_set_create(kcpc_request_t *reqs, int nreqs, 85 int set_flags, int kmem_flags); 86 87 /* 88 * Macros to manipulate context flags. All flag updates should use one of these 89 * two macros 90 * 91 * Flags should be always be updated atomically since some of the updates are 92 * not protected by locks. 93 */ 94 #define KCPC_CTX_FLAG_SET(ctx, flag) atomic_or_uint(&(ctx)->kc_flags, (flag)) 95 #define KCPC_CTX_FLAG_CLR(ctx, flag) atomic_and_uint(&(ctx)->kc_flags, ~(flag)) 96 97 /* 98 * The IS_HIPIL() macro verifies that the code is executed either from a 99 * cross-call or from high-PIL interrupt 100 */ 101 #ifdef DEBUG 102 #define IS_HIPIL() (getpil() >= XCALL_PIL) 103 #else 104 #define IS_HIPIL() 105 #endif /* DEBUG */ 106 107 108 extern int kcpc_hw_load_pcbe(void); 109 110 /* 111 * Return value from kcpc_hw_load_pcbe() 112 */ 113 static int kcpc_pcbe_error = 0; 114 115 /* 116 * Perform one-time initialization of kcpc framework. 117 * This function performs the initialization only the first time it is called. 118 * It is safe to call it multiple times. 119 */ 120 int 121 kcpc_init(void) 122 { 123 long hash; 124 static uint32_t kcpc_initialized = 0; 125 126 /* 127 * We already tried loading platform pcbe module and failed 128 */ 129 if (kcpc_pcbe_error != 0) 130 return (-1); 131 132 /* 133 * The kcpc framework should be initialized at most once 134 */ 135 if (atomic_cas_32(&kcpc_initialized, 0, 1) != 0) 136 return (0); 137 138 rw_init(&kcpc_cpuctx_lock, NULL, RW_DEFAULT, NULL); 139 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) 140 mutex_init(&kcpc_ctx_llock[hash], 141 NULL, MUTEX_DRIVER, (void *)(uintptr_t)15); 142 143 /* 144 * Load platform-specific pcbe module 145 */ 146 kcpc_pcbe_error = kcpc_hw_load_pcbe(); 147 148 return (kcpc_pcbe_error == 0 ? 0 : -1); 149 } 150 151 void 152 kcpc_register_pcbe(pcbe_ops_t *ops) 153 { 154 pcbe_ops = ops; 155 cpc_ncounters = pcbe_ops->pcbe_ncounters(); 156 } 157 158 void 159 kcpc_register_dcpc(void (*func)(uint64_t)) 160 { 161 dtrace_cpc_fire = func; 162 } 163 164 void 165 kcpc_unregister_dcpc(void) 166 { 167 dtrace_cpc_fire = NULL; 168 } 169 170 int 171 kcpc_bind_cpu(kcpc_set_t *set, processorid_t cpuid, int *subcode) 172 { 173 cpu_t *cp; 174 kcpc_ctx_t *ctx; 175 int error; 176 int save_spl; 177 178 ctx = kcpc_ctx_alloc(KM_SLEEP); 179 180 if (kcpc_assign_reqs(set, ctx) != 0) { 181 kcpc_ctx_free(ctx); 182 *subcode = CPC_RESOURCE_UNAVAIL; 183 return (EINVAL); 184 } 185 186 ctx->kc_cpuid = cpuid; 187 ctx->kc_thread = curthread; 188 189 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 190 191 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 192 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 193 kcpc_ctx_free(ctx); 194 return (error); 195 } 196 197 set->ks_ctx = ctx; 198 ctx->kc_set = set; 199 200 /* 201 * We must hold cpu_lock to prevent DR, offlining, or unbinding while 202 * we are manipulating the cpu_t and programming the hardware, else the 203 * the cpu_t could go away while we're looking at it. 204 */ 205 mutex_enter(&cpu_lock); 206 cp = cpu_get(cpuid); 207 208 if (cp == NULL) 209 /* 210 * The CPU could have been DRd out while we were getting set up. 211 */ 212 goto unbound; 213 214 mutex_enter(&cp->cpu_cpc_ctxlock); 215 kpreempt_disable(); 216 save_spl = spl_xcall(); 217 218 /* 219 * Check to see whether counters for CPU already being used by someone 220 * other than kernel for capacity and utilization (since kernel will 221 * let go of counters for user in kcpc_program() below) 222 */ 223 if (cp->cpu_cpc_ctx != NULL && !CU_CPC_ON(cp)) { 224 /* 225 * If this CPU already has a bound set, return an error. 226 */ 227 splx(save_spl); 228 kpreempt_enable(); 229 mutex_exit(&cp->cpu_cpc_ctxlock); 230 goto unbound; 231 } 232 233 if (curthread->t_bind_cpu != cpuid) { 234 splx(save_spl); 235 kpreempt_enable(); 236 mutex_exit(&cp->cpu_cpc_ctxlock); 237 goto unbound; 238 } 239 240 kcpc_program(ctx, B_FALSE, B_TRUE); 241 242 splx(save_spl); 243 kpreempt_enable(); 244 245 mutex_exit(&cp->cpu_cpc_ctxlock); 246 mutex_exit(&cpu_lock); 247 248 mutex_enter(&set->ks_lock); 249 set->ks_state |= KCPC_SET_BOUND; 250 cv_signal(&set->ks_condv); 251 mutex_exit(&set->ks_lock); 252 253 return (0); 254 255 unbound: 256 mutex_exit(&cpu_lock); 257 set->ks_ctx = NULL; 258 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 259 kcpc_ctx_free(ctx); 260 return (EAGAIN); 261 } 262 263 int 264 kcpc_bind_thread(kcpc_set_t *set, kthread_t *t, int *subcode) 265 { 266 kcpc_ctx_t *ctx; 267 int error; 268 269 /* 270 * Only one set is allowed per context, so ensure there is no 271 * existing context. 272 */ 273 274 if (t->t_cpc_ctx != NULL) 275 return (EEXIST); 276 277 ctx = kcpc_ctx_alloc(KM_SLEEP); 278 279 /* 280 * The context must begin life frozen until it has been properly 281 * programmed onto the hardware. This prevents the context ops from 282 * worrying about it until we're ready. 283 */ 284 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 285 ctx->kc_hrtime = gethrtime(); 286 287 if (kcpc_assign_reqs(set, ctx) != 0) { 288 kcpc_ctx_free(ctx); 289 *subcode = CPC_RESOURCE_UNAVAIL; 290 return (EINVAL); 291 } 292 293 ctx->kc_cpuid = -1; 294 if (set->ks_flags & CPC_BIND_LWP_INHERIT) 295 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_LWPINHERIT); 296 ctx->kc_thread = t; 297 t->t_cpc_ctx = ctx; 298 /* 299 * Permit threads to look at their own hardware counters from userland. 300 */ 301 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_NONPRIV); 302 303 /* 304 * Create the data store for this set. 305 */ 306 set->ks_data = kmem_alloc(set->ks_nreqs * sizeof (uint64_t), KM_SLEEP); 307 308 if ((error = kcpc_configure_reqs(ctx, set, subcode)) != 0) { 309 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 310 kcpc_ctx_free(ctx); 311 t->t_cpc_ctx = NULL; 312 return (error); 313 } 314 315 set->ks_ctx = ctx; 316 ctx->kc_set = set; 317 318 /* 319 * Add a device context to the subject thread. 320 */ 321 installctx(t, ctx, kcpc_save, kcpc_restore, NULL, 322 kcpc_lwp_create, NULL, kcpc_free, NULL); 323 324 /* 325 * Ask the backend to program the hardware. 326 */ 327 if (t == curthread) { 328 int save_spl; 329 330 kpreempt_disable(); 331 save_spl = spl_xcall(); 332 kcpc_program(ctx, B_TRUE, B_TRUE); 333 splx(save_spl); 334 kpreempt_enable(); 335 } else { 336 /* 337 * Since we are the agent LWP, we know the victim LWP is stopped 338 * until we're done here; no need to worry about preemption or 339 * migration here. We still use an atomic op to clear the flag 340 * to ensure the flags are always self-consistent; they can 341 * still be accessed from, for instance, another CPU doing a 342 * kcpc_invalidate_all(). 343 */ 344 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 345 } 346 347 mutex_enter(&set->ks_lock); 348 set->ks_state |= KCPC_SET_BOUND; 349 cv_signal(&set->ks_condv); 350 mutex_exit(&set->ks_lock); 351 352 return (0); 353 } 354 355 /* 356 * Walk through each request in the set and ask the PCBE to configure a 357 * corresponding counter. 358 */ 359 int 360 kcpc_configure_reqs(kcpc_ctx_t *ctx, kcpc_set_t *set, int *subcode) 361 { 362 int i; 363 int ret; 364 kcpc_request_t *rp; 365 366 for (i = 0; i < set->ks_nreqs; i++) { 367 int n; 368 rp = &set->ks_req[i]; 369 370 n = rp->kr_picnum; 371 372 ASSERT(n >= 0 && n < cpc_ncounters); 373 374 ASSERT(ctx->kc_pics[n].kp_req == NULL); 375 376 if (rp->kr_flags & CPC_OVF_NOTIFY_EMT) { 377 if ((pcbe_ops->pcbe_caps & CPC_CAP_OVERFLOW_INTERRUPT) 378 == 0) { 379 *subcode = -1; 380 return (ENOTSUP); 381 } 382 /* 383 * If any of the counters have requested overflow 384 * notification, we flag the context as being one that 385 * cares about overflow. 386 */ 387 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_SIGOVF); 388 } 389 390 rp->kr_config = NULL; 391 if ((ret = pcbe_ops->pcbe_configure(n, rp->kr_event, 392 rp->kr_preset, rp->kr_flags, rp->kr_nattrs, rp->kr_attr, 393 &(rp->kr_config), (void *)ctx)) != 0) { 394 kcpc_free_configs(set); 395 *subcode = ret; 396 switch (ret) { 397 case CPC_ATTR_REQUIRES_PRIVILEGE: 398 case CPC_HV_NO_ACCESS: 399 return (EACCES); 400 default: 401 return (EINVAL); 402 } 403 } 404 405 ctx->kc_pics[n].kp_req = rp; 406 rp->kr_picp = &ctx->kc_pics[n]; 407 rp->kr_data = set->ks_data + rp->kr_index; 408 *rp->kr_data = rp->kr_preset; 409 } 410 411 return (0); 412 } 413 414 void 415 kcpc_free_configs(kcpc_set_t *set) 416 { 417 int i; 418 419 for (i = 0; i < set->ks_nreqs; i++) 420 if (set->ks_req[i].kr_config != NULL) 421 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 422 } 423 424 /* 425 * buf points to a user address and the data should be copied out to that 426 * address in the current process. 427 */ 428 int 429 kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) 430 { 431 kcpc_ctx_t *ctx = set->ks_ctx; 432 int save_spl; 433 434 mutex_enter(&set->ks_lock); 435 if ((set->ks_state & KCPC_SET_BOUND) == 0) { 436 mutex_exit(&set->ks_lock); 437 return (EINVAL); 438 } 439 mutex_exit(&set->ks_lock); 440 441 /* 442 * Kernel preemption must be disabled while reading the hardware regs, 443 * and if this is a CPU-bound context, while checking the CPU binding of 444 * the current thread. 445 */ 446 kpreempt_disable(); 447 save_spl = spl_xcall(); 448 449 if (ctx->kc_flags & KCPC_CTX_INVALID) { 450 splx(save_spl); 451 kpreempt_enable(); 452 return (EAGAIN); 453 } 454 455 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { 456 if (ctx->kc_cpuid != -1) { 457 if (curthread->t_bind_cpu != ctx->kc_cpuid) { 458 splx(save_spl); 459 kpreempt_enable(); 460 return (EAGAIN); 461 } 462 } 463 464 if (ctx->kc_thread == curthread) { 465 uint64_t curtick = KCPC_GET_TICK(); 466 467 ctx->kc_hrtime = gethrtime_waitfree(); 468 pcbe_ops->pcbe_sample(ctx); 469 ctx->kc_vtick += curtick - ctx->kc_rawtick; 470 ctx->kc_rawtick = curtick; 471 } 472 473 /* 474 * The config may have been invalidated by 475 * the pcbe_sample op. 476 */ 477 if (ctx->kc_flags & KCPC_CTX_INVALID) { 478 splx(save_spl); 479 kpreempt_enable(); 480 return (EAGAIN); 481 } 482 483 } 484 485 splx(save_spl); 486 kpreempt_enable(); 487 488 if (copyout(set->ks_data, buf, 489 set->ks_nreqs * sizeof (uint64_t)) == -1) 490 return (EFAULT); 491 if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) 492 return (EFAULT); 493 if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) 494 return (EFAULT); 495 496 return (0); 497 } 498 499 /* 500 * Stop the counters on the CPU this context is bound to. 501 */ 502 static void 503 kcpc_stop_hw(kcpc_ctx_t *ctx) 504 { 505 cpu_t *cp; 506 507 kpreempt_disable(); 508 509 if (ctx->kc_cpuid == CPU->cpu_id) { 510 cp = CPU; 511 } else { 512 cp = cpu_get(ctx->kc_cpuid); 513 } 514 515 ASSERT(cp != NULL && cp->cpu_cpc_ctx == ctx); 516 kcpc_cpu_stop(cp, B_FALSE); 517 518 kpreempt_enable(); 519 } 520 521 int 522 kcpc_unbind(kcpc_set_t *set) 523 { 524 kcpc_ctx_t *ctx; 525 kthread_t *t; 526 527 /* 528 * We could be racing with the process's agent thread as it 529 * binds the set; we must wait for the set to finish binding 530 * before attempting to tear it down. 531 */ 532 mutex_enter(&set->ks_lock); 533 while ((set->ks_state & KCPC_SET_BOUND) == 0) 534 cv_wait(&set->ks_condv, &set->ks_lock); 535 mutex_exit(&set->ks_lock); 536 537 ctx = set->ks_ctx; 538 539 /* 540 * Use kc_lock to synchronize with kcpc_restore(). 541 */ 542 mutex_enter(&ctx->kc_lock); 543 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 544 mutex_exit(&ctx->kc_lock); 545 546 if (ctx->kc_cpuid == -1) { 547 t = ctx->kc_thread; 548 /* 549 * The context is thread-bound and therefore has a device 550 * context. It will be freed via removectx() calling 551 * freectx() calling kcpc_free(). 552 */ 553 if (t == curthread) { 554 int save_spl; 555 556 kpreempt_disable(); 557 save_spl = spl_xcall(); 558 if (!(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED)) 559 kcpc_unprogram(ctx, B_TRUE); 560 splx(save_spl); 561 kpreempt_enable(); 562 } 563 #ifdef DEBUG 564 if (removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 565 kcpc_lwp_create, NULL, kcpc_free) == 0) 566 panic("kcpc_unbind: context %p not preset on thread %p", 567 (void *)ctx, (void *)t); 568 #else 569 (void) removectx(t, ctx, kcpc_save, kcpc_restore, NULL, 570 kcpc_lwp_create, NULL, kcpc_free); 571 #endif /* DEBUG */ 572 t->t_cpc_set = NULL; 573 t->t_cpc_ctx = NULL; 574 } else { 575 /* 576 * If we are unbinding a CPU-bound set from a remote CPU, the 577 * native CPU's idle thread could be in the midst of programming 578 * this context onto the CPU. We grab the context's lock here to 579 * ensure that the idle thread is done with it. When we release 580 * the lock, the CPU no longer has a context and the idle thread 581 * will move on. 582 * 583 * cpu_lock must be held to prevent the CPU from being DR'd out 584 * while we disassociate the context from the cpu_t. 585 */ 586 cpu_t *cp; 587 mutex_enter(&cpu_lock); 588 cp = cpu_get(ctx->kc_cpuid); 589 if (cp != NULL) { 590 /* 591 * The CPU may have been DR'd out of the system. 592 */ 593 mutex_enter(&cp->cpu_cpc_ctxlock); 594 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) 595 kcpc_stop_hw(ctx); 596 ASSERT(ctx->kc_flags & KCPC_CTX_INVALID_STOPPED); 597 mutex_exit(&cp->cpu_cpc_ctxlock); 598 } 599 mutex_exit(&cpu_lock); 600 if (ctx->kc_thread == curthread) { 601 kcpc_free(ctx, 0); 602 curthread->t_cpc_set = NULL; 603 } 604 } 605 606 return (0); 607 } 608 609 int 610 kcpc_preset(kcpc_set_t *set, int index, uint64_t preset) 611 { 612 int i; 613 614 ASSERT(set != NULL); 615 ASSERT(set->ks_state & KCPC_SET_BOUND); 616 ASSERT(set->ks_ctx->kc_thread == curthread); 617 ASSERT(set->ks_ctx->kc_cpuid == -1); 618 619 if (index < 0 || index >= set->ks_nreqs) 620 return (EINVAL); 621 622 for (i = 0; i < set->ks_nreqs; i++) 623 if (set->ks_req[i].kr_index == index) 624 break; 625 ASSERT(i != set->ks_nreqs); 626 627 set->ks_req[i].kr_preset = preset; 628 return (0); 629 } 630 631 int 632 kcpc_restart(kcpc_set_t *set) 633 { 634 kcpc_ctx_t *ctx = set->ks_ctx; 635 int i; 636 int save_spl; 637 638 ASSERT(set->ks_state & KCPC_SET_BOUND); 639 ASSERT(ctx->kc_thread == curthread); 640 ASSERT(ctx->kc_cpuid == -1); 641 642 for (i = 0; i < set->ks_nreqs; i++) { 643 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 644 pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 645 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 646 } 647 648 kpreempt_disable(); 649 save_spl = spl_xcall(); 650 651 /* 652 * If the user is doing this on a running set, make sure the counters 653 * are stopped first. 654 */ 655 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 656 pcbe_ops->pcbe_allstop(); 657 658 /* 659 * Ask the backend to program the hardware. 660 */ 661 ctx->kc_rawtick = KCPC_GET_TICK(); 662 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 663 pcbe_ops->pcbe_program(ctx); 664 splx(save_spl); 665 kpreempt_enable(); 666 667 return (0); 668 } 669 670 /* 671 * Caller must hold kcpc_cpuctx_lock. 672 */ 673 int 674 kcpc_enable(kthread_t *t, int cmd, int enable) 675 { 676 kcpc_ctx_t *ctx = t->t_cpc_ctx; 677 kcpc_set_t *set = t->t_cpc_set; 678 kcpc_set_t *newset; 679 int i; 680 int flag; 681 int err; 682 683 ASSERT(RW_READ_HELD(&kcpc_cpuctx_lock)); 684 685 if (ctx == NULL) { 686 /* 687 * This thread has a set but no context; it must be a 688 * CPU-bound set. 689 */ 690 ASSERT(t->t_cpc_set != NULL); 691 ASSERT(t->t_cpc_set->ks_ctx->kc_cpuid != -1); 692 return (EINVAL); 693 } else if (ctx->kc_flags & KCPC_CTX_INVALID) 694 return (EAGAIN); 695 696 if (cmd == CPC_ENABLE) { 697 if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) 698 return (EINVAL); 699 kpreempt_disable(); 700 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 701 kcpc_restore(ctx); 702 kpreempt_enable(); 703 } else if (cmd == CPC_DISABLE) { 704 if (ctx->kc_flags & KCPC_CTX_FREEZE) 705 return (EINVAL); 706 kpreempt_disable(); 707 kcpc_save(ctx); 708 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 709 kpreempt_enable(); 710 } else if (cmd == CPC_USR_EVENTS || cmd == CPC_SYS_EVENTS) { 711 /* 712 * Strategy for usr/sys: stop counters and update set's presets 713 * with current counter values, unbind, update requests with 714 * new config, then re-bind. 715 */ 716 flag = (cmd == CPC_USR_EVENTS) ? 717 CPC_COUNT_USER: CPC_COUNT_SYSTEM; 718 719 kpreempt_disable(); 720 KCPC_CTX_FLAG_SET(ctx, 721 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 722 pcbe_ops->pcbe_allstop(); 723 kpreempt_enable(); 724 725 for (i = 0; i < set->ks_nreqs; i++) { 726 set->ks_req[i].kr_preset = *(set->ks_req[i].kr_data); 727 if (enable) 728 set->ks_req[i].kr_flags |= flag; 729 else 730 set->ks_req[i].kr_flags &= ~flag; 731 } 732 newset = kcpc_dup_set(set); 733 if (kcpc_unbind(set) != 0) 734 return (EINVAL); 735 t->t_cpc_set = newset; 736 if (kcpc_bind_thread(newset, t, &err) != 0) { 737 t->t_cpc_set = NULL; 738 kcpc_free_set(newset); 739 return (EINVAL); 740 } 741 } else 742 return (EINVAL); 743 744 return (0); 745 } 746 747 /* 748 * Provide PCBEs with a way of obtaining the configs of every counter which will 749 * be programmed together. 750 * 751 * If current is NULL, provide the first config. 752 * 753 * If data != NULL, caller wants to know where the data store associated with 754 * the config we return is located. 755 */ 756 void * 757 kcpc_next_config(void *token, void *current, uint64_t **data) 758 { 759 int i; 760 kcpc_pic_t *pic; 761 kcpc_ctx_t *ctx = (kcpc_ctx_t *)token; 762 763 if (current == NULL) { 764 /* 765 * Client would like the first config, which may not be in 766 * counter 0; we need to search through the counters for the 767 * first config. 768 */ 769 for (i = 0; i < cpc_ncounters; i++) 770 if (ctx->kc_pics[i].kp_req != NULL) 771 break; 772 /* 773 * There are no counters configured for the given context. 774 */ 775 if (i == cpc_ncounters) 776 return (NULL); 777 } else { 778 /* 779 * There surely is a faster way to do this. 780 */ 781 for (i = 0; i < cpc_ncounters; i++) { 782 pic = &ctx->kc_pics[i]; 783 784 if (pic->kp_req != NULL && 785 current == pic->kp_req->kr_config) 786 break; 787 } 788 789 /* 790 * We found the current config at picnum i. Now search for the 791 * next configured PIC. 792 */ 793 for (i++; i < cpc_ncounters; i++) { 794 pic = &ctx->kc_pics[i]; 795 if (pic->kp_req != NULL) 796 break; 797 } 798 799 if (i == cpc_ncounters) 800 return (NULL); 801 } 802 803 if (data != NULL) { 804 *data = ctx->kc_pics[i].kp_req->kr_data; 805 } 806 807 return (ctx->kc_pics[i].kp_req->kr_config); 808 } 809 810 811 kcpc_ctx_t * 812 kcpc_ctx_alloc(int kmem_flags) 813 { 814 kcpc_ctx_t *ctx; 815 long hash; 816 817 ctx = (kcpc_ctx_t *)kmem_zalloc(sizeof (kcpc_ctx_t), kmem_flags); 818 if (ctx == NULL) 819 return (NULL); 820 821 hash = CPC_HASH_CTX(ctx); 822 mutex_enter(&kcpc_ctx_llock[hash]); 823 ctx->kc_next = kcpc_ctx_list[hash]; 824 kcpc_ctx_list[hash] = ctx; 825 mutex_exit(&kcpc_ctx_llock[hash]); 826 827 ctx->kc_pics = (kcpc_pic_t *)kmem_zalloc(sizeof (kcpc_pic_t) * 828 cpc_ncounters, KM_SLEEP); 829 830 ctx->kc_cpuid = -1; 831 832 return (ctx); 833 } 834 835 /* 836 * Copy set from ctx to the child context, cctx, if it has CPC_BIND_LWP_INHERIT 837 * in the flags. 838 */ 839 static void 840 kcpc_ctx_clone(kcpc_ctx_t *ctx, kcpc_ctx_t *cctx) 841 { 842 kcpc_set_t *ks = ctx->kc_set, *cks; 843 int i, j; 844 int code; 845 846 ASSERT(ks != NULL); 847 848 if ((ks->ks_flags & CPC_BIND_LWP_INHERIT) == 0) 849 return; 850 851 cks = kmem_zalloc(sizeof (*cks), KM_SLEEP); 852 cks->ks_state &= ~KCPC_SET_BOUND; 853 cctx->kc_set = cks; 854 cks->ks_flags = ks->ks_flags; 855 cks->ks_nreqs = ks->ks_nreqs; 856 cks->ks_req = kmem_alloc(cks->ks_nreqs * 857 sizeof (kcpc_request_t), KM_SLEEP); 858 cks->ks_data = kmem_alloc(cks->ks_nreqs * sizeof (uint64_t), 859 KM_SLEEP); 860 cks->ks_ctx = cctx; 861 862 for (i = 0; i < cks->ks_nreqs; i++) { 863 cks->ks_req[i].kr_index = ks->ks_req[i].kr_index; 864 cks->ks_req[i].kr_picnum = ks->ks_req[i].kr_picnum; 865 (void) strncpy(cks->ks_req[i].kr_event, 866 ks->ks_req[i].kr_event, CPC_MAX_EVENT_LEN); 867 cks->ks_req[i].kr_preset = ks->ks_req[i].kr_preset; 868 cks->ks_req[i].kr_flags = ks->ks_req[i].kr_flags; 869 cks->ks_req[i].kr_nattrs = ks->ks_req[i].kr_nattrs; 870 if (ks->ks_req[i].kr_nattrs > 0) { 871 cks->ks_req[i].kr_attr = 872 kmem_alloc(ks->ks_req[i].kr_nattrs * 873 sizeof (kcpc_attr_t), KM_SLEEP); 874 } 875 for (j = 0; j < ks->ks_req[i].kr_nattrs; j++) { 876 (void) strncpy(cks->ks_req[i].kr_attr[j].ka_name, 877 ks->ks_req[i].kr_attr[j].ka_name, 878 CPC_MAX_ATTR_LEN); 879 cks->ks_req[i].kr_attr[j].ka_val = 880 ks->ks_req[i].kr_attr[j].ka_val; 881 } 882 } 883 if (kcpc_configure_reqs(cctx, cks, &code) != 0) 884 kcpc_invalidate_config(cctx); 885 886 mutex_enter(&cks->ks_lock); 887 cks->ks_state |= KCPC_SET_BOUND; 888 cv_signal(&cks->ks_condv); 889 mutex_exit(&cks->ks_lock); 890 } 891 892 893 void 894 kcpc_ctx_free(kcpc_ctx_t *ctx) 895 { 896 kcpc_ctx_t **loc; 897 long hash = CPC_HASH_CTX(ctx); 898 899 mutex_enter(&kcpc_ctx_llock[hash]); 900 loc = &kcpc_ctx_list[hash]; 901 ASSERT(*loc != NULL); 902 while (*loc != ctx) 903 loc = &(*loc)->kc_next; 904 *loc = ctx->kc_next; 905 mutex_exit(&kcpc_ctx_llock[hash]); 906 907 kmem_free(ctx->kc_pics, cpc_ncounters * sizeof (kcpc_pic_t)); 908 cv_destroy(&ctx->kc_condv); 909 mutex_destroy(&ctx->kc_lock); 910 kmem_free(ctx, sizeof (*ctx)); 911 } 912 913 /* 914 * Generic interrupt handler used on hardware that generates 915 * overflow interrupts. 916 * 917 * Note: executed at high-level interrupt context! 918 */ 919 /*ARGSUSED*/ 920 kcpc_ctx_t * 921 kcpc_overflow_intr(caddr_t arg, uint64_t bitmap) 922 { 923 kcpc_ctx_t *ctx; 924 kthread_t *t = curthread; 925 int i; 926 927 /* 928 * On both x86 and UltraSPARC, we may deliver the high-level 929 * interrupt in kernel mode, just after we've started to run an 930 * interrupt thread. (That's because the hardware helpfully 931 * delivers the overflow interrupt some random number of cycles 932 * after the instruction that caused the overflow by which time 933 * we're in some part of the kernel, not necessarily running on 934 * the right thread). 935 * 936 * Check for this case here -- find the pinned thread 937 * that was running when the interrupt went off. 938 */ 939 if (t->t_flag & T_INTR_THREAD) { 940 klwp_t *lwp; 941 942 atomic_inc_32(&kcpc_intrctx_count); 943 944 /* 945 * Note that t_lwp is always set to point at the underlying 946 * thread, thus this will work in the presence of nested 947 * interrupts. 948 */ 949 ctx = NULL; 950 if ((lwp = t->t_lwp) != NULL) { 951 t = lwptot(lwp); 952 ctx = t->t_cpc_ctx; 953 } 954 } else 955 ctx = t->t_cpc_ctx; 956 957 if (ctx == NULL) { 958 /* 959 * This can easily happen if we're using the counters in 960 * "shared" mode, for example, and an overflow interrupt 961 * occurs while we are running cpustat. In that case, the 962 * bound thread that has the context that belongs to this 963 * CPU is almost certainly sleeping (if it was running on 964 * the CPU we'd have found it above), and the actual 965 * interrupted thread has no knowledge of performance counters! 966 */ 967 ctx = curthread->t_cpu->cpu_cpc_ctx; 968 if (ctx != NULL) { 969 /* 970 * Return the bound context for this CPU to 971 * the interrupt handler so that it can synchronously 972 * sample the hardware counters and restart them. 973 */ 974 return (ctx); 975 } 976 977 /* 978 * As long as the overflow interrupt really is delivered early 979 * enough after trapping into the kernel to avoid switching 980 * threads, we must always be able to find the cpc context, 981 * or something went terribly wrong i.e. we ended up 982 * running a passivated interrupt thread, a kernel 983 * thread or we interrupted idle, all of which are Very Bad. 984 * 985 * We also could end up here owing to an incredibly unlikely 986 * race condition that exists on x86 based architectures when 987 * the cpc provider is in use; overflow interrupts are directed 988 * to the cpc provider if the 'dtrace_cpc_in_use' variable is 989 * set when we enter the handler. This variable is unset after 990 * overflow interrupts have been disabled on all CPUs and all 991 * contexts have been torn down. To stop interrupts, the cpc 992 * provider issues a xcall to the remote CPU before it tears 993 * down that CPUs context. As high priority xcalls, on an x86 994 * architecture, execute at a higher PIL than this handler, it 995 * is possible (though extremely unlikely) that the xcall could 996 * interrupt the overflow handler before the handler has 997 * checked the 'dtrace_cpc_in_use' variable, stop the counters, 998 * return to the cpc provider which could then rip down 999 * contexts and unset 'dtrace_cpc_in_use' *before* the CPUs 1000 * overflow handler has had a chance to check the variable. In 1001 * that case, the handler would direct the overflow into this 1002 * code and no valid context will be found. The default behavior 1003 * when no valid context is found is now to shout a warning to 1004 * the console and bump the 'kcpc_nullctx_count' variable. 1005 */ 1006 if (kcpc_nullctx_panic) 1007 panic("null cpc context, thread %p", (void *)t); 1008 #ifdef DEBUG 1009 cmn_err(CE_NOTE, 1010 "null cpc context found in overflow handler!\n"); 1011 #endif 1012 atomic_inc_32(&kcpc_nullctx_count); 1013 } else if ((ctx->kc_flags & KCPC_CTX_INVALID) == 0) { 1014 /* 1015 * Schedule an ast to sample the counters, which will 1016 * propagate any overflow into the virtualized performance 1017 * counter(s), and may deliver a signal. 1018 */ 1019 ttolwp(t)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1020 /* 1021 * If a counter has overflowed which was counting on behalf of 1022 * a request which specified CPC_OVF_NOTIFY_EMT, send the 1023 * process a signal. 1024 */ 1025 for (i = 0; i < cpc_ncounters; i++) { 1026 if (ctx->kc_pics[i].kp_req != NULL && 1027 bitmap & (1 << i) && 1028 ctx->kc_pics[i].kp_req->kr_flags & 1029 CPC_OVF_NOTIFY_EMT) { 1030 /* 1031 * A signal has been requested for this PIC, so 1032 * so freeze the context. The interrupt handler 1033 * has already stopped the counter hardware. 1034 */ 1035 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1036 atomic_or_uint(&ctx->kc_pics[i].kp_flags, 1037 KCPC_PIC_OVERFLOWED); 1038 } 1039 } 1040 aston(t); 1041 } else if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1042 /* 1043 * Thread context is no longer valid, but here may be a valid 1044 * CPU context. 1045 */ 1046 return (curthread->t_cpu->cpu_cpc_ctx); 1047 } 1048 1049 return (NULL); 1050 } 1051 1052 /* 1053 * The current thread context had an overflow interrupt; we're 1054 * executing here in high-level interrupt context. 1055 */ 1056 /*ARGSUSED*/ 1057 uint_t 1058 kcpc_hw_overflow_intr(caddr_t arg1, caddr_t arg2) 1059 { 1060 kcpc_ctx_t *ctx; 1061 uint64_t bitmap; 1062 uint8_t *state; 1063 int save_spl; 1064 1065 if (pcbe_ops == NULL || 1066 (bitmap = pcbe_ops->pcbe_overflow_bitmap()) == 0) 1067 return (DDI_INTR_UNCLAIMED); 1068 1069 /* 1070 * Prevent any further interrupts. 1071 */ 1072 pcbe_ops->pcbe_allstop(); 1073 1074 if (dtrace_cpc_in_use) { 1075 state = &cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state; 1076 1077 /* 1078 * Set the per-CPU state bit to indicate that we are currently 1079 * processing an interrupt if it is currently free. Drop the 1080 * interrupt if the state isn't free (i.e. a configuration 1081 * event is taking place). 1082 */ 1083 if (atomic_cas_8(state, DCPC_INTR_FREE, 1084 DCPC_INTR_PROCESSING) == DCPC_INTR_FREE) { 1085 int i; 1086 kcpc_request_t req; 1087 1088 ASSERT(dtrace_cpc_fire != NULL); 1089 1090 (*dtrace_cpc_fire)(bitmap); 1091 1092 ctx = curthread->t_cpu->cpu_cpc_ctx; 1093 if (ctx == NULL) { 1094 #ifdef DEBUG 1095 cmn_err(CE_NOTE, "null cpc context in" 1096 "hardware overflow handler!\n"); 1097 #endif 1098 return (DDI_INTR_CLAIMED); 1099 } 1100 1101 /* Reset any counters that have overflowed */ 1102 for (i = 0; i < ctx->kc_set->ks_nreqs; i++) { 1103 req = ctx->kc_set->ks_req[i]; 1104 1105 if (bitmap & (1 << req.kr_picnum)) { 1106 pcbe_ops->pcbe_configure(req.kr_picnum, 1107 req.kr_event, req.kr_preset, 1108 req.kr_flags, req.kr_nattrs, 1109 req.kr_attr, &(req.kr_config), 1110 (void *)ctx); 1111 } 1112 } 1113 pcbe_ops->pcbe_program(ctx); 1114 1115 /* 1116 * We've finished processing the interrupt so set 1117 * the state back to free. 1118 */ 1119 cpu_core[CPU->cpu_id].cpuc_dcpc_intr_state = 1120 DCPC_INTR_FREE; 1121 membar_producer(); 1122 } 1123 return (DDI_INTR_CLAIMED); 1124 } 1125 1126 /* 1127 * DTrace isn't involved so pass on accordingly. 1128 * 1129 * If the interrupt has occurred in the context of an lwp owning 1130 * the counters, then the handler posts an AST to the lwp to 1131 * trigger the actual sampling, and optionally deliver a signal or 1132 * restart the counters, on the way out of the kernel using 1133 * kcpc_hw_overflow_ast() (see below). 1134 * 1135 * On the other hand, if the handler returns the context to us 1136 * directly, then it means that there are no other threads in 1137 * the middle of updating it, no AST has been posted, and so we 1138 * should sample the counters here, and restart them with no 1139 * further fuss. 1140 * 1141 * The CPU's CPC context may disappear as a result of cross-call which 1142 * has higher PIL on x86, so protect the context by raising PIL to the 1143 * cross-call level. 1144 */ 1145 save_spl = spl_xcall(); 1146 if ((ctx = kcpc_overflow_intr(arg1, bitmap)) != NULL) { 1147 uint64_t curtick = KCPC_GET_TICK(); 1148 1149 ctx->kc_hrtime = gethrtime_waitfree(); 1150 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1151 ctx->kc_rawtick = curtick; 1152 pcbe_ops->pcbe_sample(ctx); 1153 pcbe_ops->pcbe_program(ctx); 1154 } 1155 splx(save_spl); 1156 1157 return (DDI_INTR_CLAIMED); 1158 } 1159 1160 /* 1161 * Called from trap() when processing the ast posted by the high-level 1162 * interrupt handler. 1163 */ 1164 int 1165 kcpc_overflow_ast() 1166 { 1167 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1168 int i; 1169 int found = 0; 1170 uint64_t curtick = KCPC_GET_TICK(); 1171 1172 ASSERT(ctx != NULL); /* Beware of interrupt skid. */ 1173 1174 /* 1175 * An overflow happened: sample the context to ensure that 1176 * the overflow is propagated into the upper bits of the 1177 * virtualized 64-bit counter(s). 1178 */ 1179 kpreempt_disable(); 1180 ctx->kc_hrtime = gethrtime_waitfree(); 1181 pcbe_ops->pcbe_sample(ctx); 1182 kpreempt_enable(); 1183 1184 ctx->kc_vtick += curtick - ctx->kc_rawtick; 1185 1186 /* 1187 * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED 1188 * if that pic generated an overflow and if the request it was counting 1189 * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all 1190 * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we 1191 * found any overflowed pics, keep the context frozen and return true 1192 * (thus causing a signal to be sent). 1193 */ 1194 for (i = 0; i < cpc_ncounters; i++) { 1195 if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { 1196 atomic_and_uint(&ctx->kc_pics[i].kp_flags, 1197 ~KCPC_PIC_OVERFLOWED); 1198 found = 1; 1199 } 1200 } 1201 if (found) 1202 return (1); 1203 1204 /* 1205 * Otherwise, re-enable the counters and continue life as before. 1206 */ 1207 kpreempt_disable(); 1208 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 1209 pcbe_ops->pcbe_program(ctx); 1210 kpreempt_enable(); 1211 return (0); 1212 } 1213 1214 /* 1215 * Called when switching away from current thread. 1216 */ 1217 static void 1218 kcpc_save(kcpc_ctx_t *ctx) 1219 { 1220 int err; 1221 int save_spl; 1222 1223 kpreempt_disable(); 1224 save_spl = spl_xcall(); 1225 1226 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1227 if (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) { 1228 splx(save_spl); 1229 kpreempt_enable(); 1230 return; 1231 } 1232 /* 1233 * This context has been invalidated but the counters have not 1234 * been stopped. Stop them here and mark the context stopped. 1235 */ 1236 kcpc_unprogram(ctx, B_TRUE); 1237 splx(save_spl); 1238 kpreempt_enable(); 1239 return; 1240 } 1241 1242 pcbe_ops->pcbe_allstop(); 1243 if (ctx->kc_flags & KCPC_CTX_FREEZE) { 1244 splx(save_spl); 1245 kpreempt_enable(); 1246 return; 1247 } 1248 1249 /* 1250 * Need to sample for all reqs into each req's current mpic. 1251 */ 1252 ctx->kc_hrtime = gethrtime_waitfree(); 1253 ctx->kc_vtick += KCPC_GET_TICK() - ctx->kc_rawtick; 1254 pcbe_ops->pcbe_sample(ctx); 1255 1256 /* 1257 * Program counter for measuring capacity and utilization since user 1258 * thread isn't using counter anymore 1259 */ 1260 ASSERT(ctx->kc_cpuid == -1); 1261 cu_cpc_program(CPU, &err); 1262 splx(save_spl); 1263 kpreempt_enable(); 1264 } 1265 1266 static void 1267 kcpc_restore(kcpc_ctx_t *ctx) 1268 { 1269 int save_spl; 1270 1271 mutex_enter(&ctx->kc_lock); 1272 1273 if ((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == 1274 KCPC_CTX_INVALID) { 1275 /* 1276 * The context is invalidated but has not been marked stopped. 1277 * We mark it as such here because we will not start the 1278 * counters during this context switch. 1279 */ 1280 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 1281 } 1282 1283 if (ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_FREEZE)) { 1284 mutex_exit(&ctx->kc_lock); 1285 return; 1286 } 1287 1288 /* 1289 * Set kc_flags to show that a kcpc_restore() is in progress to avoid 1290 * ctx & set related memory objects being freed without us knowing. 1291 * This can happen if an agent thread is executing a kcpc_unbind(), 1292 * with this thread as the target, whilst we're concurrently doing a 1293 * restorectx() during, for example, a proc_exit(). Effectively, by 1294 * doing this, we're asking kcpc_free() to cv_wait() until 1295 * kcpc_restore() has completed. 1296 */ 1297 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_RESTORE); 1298 mutex_exit(&ctx->kc_lock); 1299 1300 /* 1301 * While programming the hardware, the counters should be stopped. We 1302 * don't do an explicit pcbe_allstop() here because they should have 1303 * been stopped already by the last consumer. 1304 */ 1305 kpreempt_disable(); 1306 save_spl = spl_xcall(); 1307 kcpc_program(ctx, B_TRUE, B_TRUE); 1308 splx(save_spl); 1309 kpreempt_enable(); 1310 1311 /* 1312 * Wake the agent thread if it's waiting in kcpc_free(). 1313 */ 1314 mutex_enter(&ctx->kc_lock); 1315 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_RESTORE); 1316 cv_signal(&ctx->kc_condv); 1317 mutex_exit(&ctx->kc_lock); 1318 } 1319 1320 /* 1321 * If kcpc_counts_include_idle is set to 0 by the sys admin, we add the the 1322 * following context operators to the idle thread on each CPU. They stop the 1323 * counters when the idle thread is switched on, and they start them again when 1324 * it is switched off. 1325 */ 1326 /*ARGSUSED*/ 1327 void 1328 kcpc_idle_save(struct cpu *cp) 1329 { 1330 /* 1331 * The idle thread shouldn't be run anywhere else. 1332 */ 1333 ASSERT(CPU == cp); 1334 1335 /* 1336 * We must hold the CPU's context lock to ensure the context isn't freed 1337 * while we're looking at it. 1338 */ 1339 mutex_enter(&cp->cpu_cpc_ctxlock); 1340 1341 if ((cp->cpu_cpc_ctx == NULL) || 1342 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1343 mutex_exit(&cp->cpu_cpc_ctxlock); 1344 return; 1345 } 1346 1347 pcbe_ops->pcbe_program(cp->cpu_cpc_ctx); 1348 mutex_exit(&cp->cpu_cpc_ctxlock); 1349 } 1350 1351 void 1352 kcpc_idle_restore(struct cpu *cp) 1353 { 1354 /* 1355 * The idle thread shouldn't be run anywhere else. 1356 */ 1357 ASSERT(CPU == cp); 1358 1359 /* 1360 * We must hold the CPU's context lock to ensure the context isn't freed 1361 * while we're looking at it. 1362 */ 1363 mutex_enter(&cp->cpu_cpc_ctxlock); 1364 1365 if ((cp->cpu_cpc_ctx == NULL) || 1366 (cp->cpu_cpc_ctx->kc_flags & KCPC_CTX_INVALID)) { 1367 mutex_exit(&cp->cpu_cpc_ctxlock); 1368 return; 1369 } 1370 1371 pcbe_ops->pcbe_allstop(); 1372 mutex_exit(&cp->cpu_cpc_ctxlock); 1373 } 1374 1375 /*ARGSUSED*/ 1376 static void 1377 kcpc_lwp_create(kthread_t *t, kthread_t *ct) 1378 { 1379 kcpc_ctx_t *ctx = t->t_cpc_ctx, *cctx; 1380 int i; 1381 1382 if (ctx == NULL || (ctx->kc_flags & KCPC_CTX_LWPINHERIT) == 0) 1383 return; 1384 1385 rw_enter(&kcpc_cpuctx_lock, RW_READER); 1386 if (ctx->kc_flags & KCPC_CTX_INVALID) { 1387 rw_exit(&kcpc_cpuctx_lock); 1388 return; 1389 } 1390 cctx = kcpc_ctx_alloc(KM_SLEEP); 1391 kcpc_ctx_clone(ctx, cctx); 1392 rw_exit(&kcpc_cpuctx_lock); 1393 1394 /* 1395 * Copy the parent context's kc_flags field, but don't overwrite 1396 * the child's in case it was modified during kcpc_ctx_clone. 1397 */ 1398 KCPC_CTX_FLAG_SET(cctx, ctx->kc_flags); 1399 cctx->kc_thread = ct; 1400 cctx->kc_cpuid = -1; 1401 ct->t_cpc_set = cctx->kc_set; 1402 ct->t_cpc_ctx = cctx; 1403 1404 if (cctx->kc_flags & KCPC_CTX_SIGOVF) { 1405 kcpc_set_t *ks = cctx->kc_set; 1406 /* 1407 * Our contract with the user requires us to immediately send an 1408 * overflow signal to all children if we have the LWPINHERIT 1409 * and SIGOVF flags set. In addition, all counters should be 1410 * set to UINT64_MAX, and their pic's overflow flag turned on 1411 * so that our trap() processing knows to send a signal. 1412 */ 1413 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_FREEZE); 1414 for (i = 0; i < ks->ks_nreqs; i++) { 1415 kcpc_request_t *kr = &ks->ks_req[i]; 1416 1417 if (kr->kr_flags & CPC_OVF_NOTIFY_EMT) { 1418 *(kr->kr_data) = UINT64_MAX; 1419 atomic_or_uint(&kr->kr_picp->kp_flags, 1420 KCPC_PIC_OVERFLOWED); 1421 } 1422 } 1423 ttolwp(ct)->lwp_pcb.pcb_flags |= CPC_OVERFLOW; 1424 aston(ct); 1425 } 1426 1427 installctx(ct, cctx, kcpc_save, kcpc_restore, 1428 NULL, kcpc_lwp_create, NULL, kcpc_free, NULL); 1429 } 1430 1431 /* 1432 * Counter Stoppage Theory 1433 * 1434 * The counters may need to be stopped properly at the following occasions: 1435 * 1436 * 1) An LWP exits. 1437 * 2) A thread exits. 1438 * 3) An LWP performs an exec(). 1439 * 4) A bound set is unbound. 1440 * 1441 * In addition to stopping the counters, the CPC context (a kcpc_ctx_t) may need 1442 * to be freed as well. 1443 * 1444 * Case 1: kcpc_passivate(), called via lwp_exit(), stops the counters. Later on 1445 * when the thread is freed, kcpc_free(), called by freectx(), frees the 1446 * context. 1447 * 1448 * Case 2: same as case 1 except kcpc_passivate is called from thread_exit(). 1449 * 1450 * Case 3: kcpc_free(), called via freectx() via exec(), recognizes that it has 1451 * been called from exec. It stops the counters _and_ frees the context. 1452 * 1453 * Case 4: kcpc_unbind() stops the hardware _and_ frees the context. 1454 * 1455 * CPU-bound counters are always stopped via kcpc_unbind(). 1456 */ 1457 1458 /* 1459 * We're being called to delete the context; we ensure that all associated data 1460 * structures are freed, and that the hardware is passivated if this is an exec. 1461 */ 1462 1463 /*ARGSUSED*/ 1464 void 1465 kcpc_free(kcpc_ctx_t *ctx, int isexec) 1466 { 1467 int i; 1468 kcpc_set_t *set = ctx->kc_set; 1469 1470 ASSERT(set != NULL); 1471 1472 /* 1473 * Wait for kcpc_restore() to finish before we tear things down. 1474 */ 1475 mutex_enter(&ctx->kc_lock); 1476 while (ctx->kc_flags & KCPC_CTX_RESTORE) 1477 cv_wait(&ctx->kc_condv, &ctx->kc_lock); 1478 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1479 mutex_exit(&ctx->kc_lock); 1480 1481 if (isexec) { 1482 /* 1483 * This thread is execing, and after the exec it should not have 1484 * any performance counter context. Stop the counters properly 1485 * here so the system isn't surprised by an overflow interrupt 1486 * later. 1487 */ 1488 if (ctx->kc_cpuid != -1) { 1489 cpu_t *cp; 1490 /* 1491 * CPU-bound context; stop the appropriate CPU's ctrs. 1492 * Hold cpu_lock while examining the CPU to ensure it 1493 * doesn't go away. 1494 */ 1495 mutex_enter(&cpu_lock); 1496 cp = cpu_get(ctx->kc_cpuid); 1497 /* 1498 * The CPU could have been DR'd out, so only stop the 1499 * CPU and clear its context pointer if the CPU still 1500 * exists. 1501 */ 1502 if (cp != NULL) { 1503 mutex_enter(&cp->cpu_cpc_ctxlock); 1504 kcpc_stop_hw(ctx); 1505 mutex_exit(&cp->cpu_cpc_ctxlock); 1506 } 1507 mutex_exit(&cpu_lock); 1508 ASSERT(curthread->t_cpc_ctx == NULL); 1509 } else { 1510 int save_spl; 1511 1512 /* 1513 * Thread-bound context; stop _this_ CPU's counters. 1514 */ 1515 kpreempt_disable(); 1516 save_spl = spl_xcall(); 1517 kcpc_unprogram(ctx, B_TRUE); 1518 curthread->t_cpc_ctx = NULL; 1519 splx(save_spl); 1520 kpreempt_enable(); 1521 } 1522 1523 /* 1524 * Since we are being called from an exec and we know that 1525 * exec is not permitted via the agent thread, we should clean 1526 * up this thread's CPC state completely, and not leave dangling 1527 * CPC pointers behind. 1528 */ 1529 ASSERT(ctx->kc_thread == curthread); 1530 curthread->t_cpc_set = NULL; 1531 } 1532 1533 /* 1534 * Walk through each request in this context's set and free the PCBE's 1535 * configuration if it exists. 1536 */ 1537 for (i = 0; i < set->ks_nreqs; i++) { 1538 if (set->ks_req[i].kr_config != NULL) 1539 pcbe_ops->pcbe_free(set->ks_req[i].kr_config); 1540 } 1541 1542 kmem_free(set->ks_data, set->ks_nreqs * sizeof (uint64_t)); 1543 kcpc_ctx_free(ctx); 1544 kcpc_free_set(set); 1545 } 1546 1547 /* 1548 * Free the memory associated with a request set. 1549 */ 1550 void 1551 kcpc_free_set(kcpc_set_t *set) 1552 { 1553 int i; 1554 kcpc_request_t *req; 1555 1556 ASSERT(set->ks_req != NULL); 1557 1558 for (i = 0; i < set->ks_nreqs; i++) { 1559 req = &set->ks_req[i]; 1560 1561 if (req->kr_nattrs != 0) { 1562 kmem_free(req->kr_attr, 1563 req->kr_nattrs * sizeof (kcpc_attr_t)); 1564 } 1565 } 1566 1567 kmem_free(set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 1568 cv_destroy(&set->ks_condv); 1569 mutex_destroy(&set->ks_lock); 1570 kmem_free(set, sizeof (kcpc_set_t)); 1571 } 1572 1573 /* 1574 * Grab every existing context and mark it as invalid. 1575 */ 1576 void 1577 kcpc_invalidate_all(void) 1578 { 1579 kcpc_ctx_t *ctx; 1580 long hash; 1581 1582 for (hash = 0; hash < CPC_HASH_BUCKETS; hash++) { 1583 mutex_enter(&kcpc_ctx_llock[hash]); 1584 for (ctx = kcpc_ctx_list[hash]; ctx; ctx = ctx->kc_next) 1585 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1586 mutex_exit(&kcpc_ctx_llock[hash]); 1587 } 1588 } 1589 1590 /* 1591 * Interface for PCBEs to signal that an existing configuration has suddenly 1592 * become invalid. 1593 */ 1594 void 1595 kcpc_invalidate_config(void *token) 1596 { 1597 kcpc_ctx_t *ctx = token; 1598 1599 ASSERT(ctx != NULL); 1600 1601 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1602 } 1603 1604 /* 1605 * Called from lwp_exit() and thread_exit() 1606 */ 1607 void 1608 kcpc_passivate(void) 1609 { 1610 kcpc_ctx_t *ctx = curthread->t_cpc_ctx; 1611 kcpc_set_t *set = curthread->t_cpc_set; 1612 int save_spl; 1613 1614 if (set == NULL) 1615 return; 1616 1617 if (ctx == NULL) { 1618 /* 1619 * This thread has a set but no context; it must be a CPU-bound 1620 * set. The hardware will be stopped via kcpc_unbind() when the 1621 * process exits and closes its file descriptors with 1622 * kcpc_close(). Our only job here is to clean up this thread's 1623 * state; the set will be freed with the unbind(). 1624 */ 1625 (void) kcpc_unbind(set); 1626 /* 1627 * Unbinding a set belonging to the current thread should clear 1628 * its set pointer. 1629 */ 1630 ASSERT(curthread->t_cpc_set == NULL); 1631 return; 1632 } 1633 1634 kpreempt_disable(); 1635 save_spl = spl_xcall(); 1636 curthread->t_cpc_set = NULL; 1637 1638 /* 1639 * This thread/LWP is exiting but context switches will continue to 1640 * happen for a bit as the exit proceeds. Kernel preemption must be 1641 * disabled here to prevent a race between checking or setting the 1642 * INVALID_STOPPED flag here and kcpc_restore() setting the flag during 1643 * a context switch. 1644 */ 1645 if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { 1646 kcpc_unprogram(ctx, B_TRUE); 1647 KCPC_CTX_FLAG_SET(ctx, 1648 KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); 1649 } 1650 1651 /* 1652 * We're cleaning up after this thread; ensure there are no dangling 1653 * CPC pointers left behind. The context and set will be freed by 1654 * freectx(). 1655 */ 1656 curthread->t_cpc_ctx = NULL; 1657 1658 splx(save_spl); 1659 kpreempt_enable(); 1660 } 1661 1662 /* 1663 * Assign the requests in the given set to the PICs in the context. 1664 * Returns 0 if successful, -1 on failure. 1665 */ 1666 /*ARGSUSED*/ 1667 int 1668 kcpc_assign_reqs(kcpc_set_t *set, kcpc_ctx_t *ctx) 1669 { 1670 int i; 1671 int *picnum_save; 1672 1673 ASSERT(set->ks_nreqs <= cpc_ncounters); 1674 1675 /* 1676 * Provide kcpc_tryassign() with scratch space to avoid doing an 1677 * alloc/free with every invocation. 1678 */ 1679 picnum_save = kmem_alloc(set->ks_nreqs * sizeof (int), KM_SLEEP); 1680 /* 1681 * kcpc_tryassign() blindly walks through each request in the set, 1682 * seeing if a counter can count its event. If yes, it assigns that 1683 * counter. However, that counter may have been the only capable counter 1684 * for _another_ request's event. The solution is to try every possible 1685 * request first. Note that this does not cover all solutions, as 1686 * that would require all unique orderings of requests, an n^n operation 1687 * which would be unacceptable for architectures with many counters. 1688 */ 1689 for (i = 0; i < set->ks_nreqs; i++) 1690 if (kcpc_tryassign(set, i, picnum_save) == 0) 1691 break; 1692 1693 kmem_free(picnum_save, set->ks_nreqs * sizeof (int)); 1694 if (i == set->ks_nreqs) 1695 return (-1); 1696 return (0); 1697 } 1698 1699 static int 1700 kcpc_tryassign(kcpc_set_t *set, int starting_req, int *scratch) 1701 { 1702 int i; 1703 int j; 1704 uint64_t bitmap = 0, resmap = 0; 1705 uint64_t ctrmap; 1706 1707 /* 1708 * We are attempting to assign the reqs to pics, but we may fail. If we 1709 * fail, we need to restore the state of the requests to what it was 1710 * when we found it, as some reqs may have been explicitly assigned to 1711 * a specific PIC beforehand. We do this by snapshotting the assignments 1712 * now and restoring from it later if we fail. 1713 * 1714 * Also we note here which counters have already been claimed by 1715 * requests with explicit counter assignments. 1716 */ 1717 for (i = 0; i < set->ks_nreqs; i++) { 1718 scratch[i] = set->ks_req[i].kr_picnum; 1719 if (set->ks_req[i].kr_picnum != -1) 1720 resmap |= (1 << set->ks_req[i].kr_picnum); 1721 } 1722 1723 /* 1724 * Walk through requests assigning them to the first PIC that is 1725 * capable. 1726 */ 1727 i = starting_req; 1728 do { 1729 if (set->ks_req[i].kr_picnum != -1) { 1730 ASSERT((bitmap & (1 << set->ks_req[i].kr_picnum)) == 0); 1731 bitmap |= (1 << set->ks_req[i].kr_picnum); 1732 if (++i == set->ks_nreqs) 1733 i = 0; 1734 continue; 1735 } 1736 1737 ctrmap = pcbe_ops->pcbe_event_coverage(set->ks_req[i].kr_event); 1738 for (j = 0; j < cpc_ncounters; j++) { 1739 if (ctrmap & (1 << j) && (bitmap & (1 << j)) == 0 && 1740 (resmap & (1 << j)) == 0) { 1741 /* 1742 * We can assign this counter because: 1743 * 1744 * 1. It can count the event (ctrmap) 1745 * 2. It hasn't been assigned yet (bitmap) 1746 * 3. It wasn't reserved by a request (resmap) 1747 */ 1748 bitmap |= (1 << j); 1749 break; 1750 } 1751 } 1752 if (j == cpc_ncounters) { 1753 for (i = 0; i < set->ks_nreqs; i++) 1754 set->ks_req[i].kr_picnum = scratch[i]; 1755 return (-1); 1756 } 1757 set->ks_req[i].kr_picnum = j; 1758 1759 if (++i == set->ks_nreqs) 1760 i = 0; 1761 } while (i != starting_req); 1762 1763 return (0); 1764 } 1765 1766 kcpc_set_t * 1767 kcpc_dup_set(kcpc_set_t *set) 1768 { 1769 kcpc_set_t *new; 1770 int i; 1771 int j; 1772 1773 new = kmem_zalloc(sizeof (*new), KM_SLEEP); 1774 new->ks_state &= ~KCPC_SET_BOUND; 1775 new->ks_flags = set->ks_flags; 1776 new->ks_nreqs = set->ks_nreqs; 1777 new->ks_req = kmem_alloc(set->ks_nreqs * sizeof (kcpc_request_t), 1778 KM_SLEEP); 1779 new->ks_data = NULL; 1780 new->ks_ctx = NULL; 1781 1782 for (i = 0; i < new->ks_nreqs; i++) { 1783 new->ks_req[i].kr_config = NULL; 1784 new->ks_req[i].kr_index = set->ks_req[i].kr_index; 1785 new->ks_req[i].kr_picnum = set->ks_req[i].kr_picnum; 1786 new->ks_req[i].kr_picp = NULL; 1787 new->ks_req[i].kr_data = NULL; 1788 (void) strncpy(new->ks_req[i].kr_event, set->ks_req[i].kr_event, 1789 CPC_MAX_EVENT_LEN); 1790 new->ks_req[i].kr_preset = set->ks_req[i].kr_preset; 1791 new->ks_req[i].kr_flags = set->ks_req[i].kr_flags; 1792 new->ks_req[i].kr_nattrs = set->ks_req[i].kr_nattrs; 1793 new->ks_req[i].kr_attr = kmem_alloc(new->ks_req[i].kr_nattrs * 1794 sizeof (kcpc_attr_t), KM_SLEEP); 1795 for (j = 0; j < new->ks_req[i].kr_nattrs; j++) { 1796 new->ks_req[i].kr_attr[j].ka_val = 1797 set->ks_req[i].kr_attr[j].ka_val; 1798 (void) strncpy(new->ks_req[i].kr_attr[j].ka_name, 1799 set->ks_req[i].kr_attr[j].ka_name, 1800 CPC_MAX_ATTR_LEN); 1801 } 1802 } 1803 1804 return (new); 1805 } 1806 1807 int 1808 kcpc_allow_nonpriv(void *token) 1809 { 1810 return (((kcpc_ctx_t *)token)->kc_flags & KCPC_CTX_NONPRIV); 1811 } 1812 1813 void 1814 kcpc_invalidate(kthread_t *t) 1815 { 1816 kcpc_ctx_t *ctx = t->t_cpc_ctx; 1817 1818 if (ctx != NULL) 1819 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID); 1820 } 1821 1822 /* 1823 * Given a PCBE ID, attempt to load a matching PCBE module. The strings given 1824 * are used to construct PCBE names, starting with the most specific, 1825 * "pcbe.first.second.third.fourth" and ending with the least specific, 1826 * "pcbe.first". 1827 * 1828 * Returns 0 if a PCBE was successfully loaded and -1 upon error. 1829 */ 1830 int 1831 kcpc_pcbe_tryload(const char *prefix, uint_t first, uint_t second, uint_t third) 1832 { 1833 uint_t s[3]; 1834 1835 s[0] = first; 1836 s[1] = second; 1837 s[2] = third; 1838 1839 return (modload_qualified("pcbe", 1840 "pcbe", prefix, ".", s, 3, NULL) < 0 ? -1 : 0); 1841 } 1842 1843 /* 1844 * Create one or more CPC context for given CPU with specified counter event 1845 * requests 1846 * 1847 * If number of requested counter events is less than or equal number of 1848 * hardware counters on a CPU and can all be assigned to the counters on a CPU 1849 * at the same time, then make one CPC context. 1850 * 1851 * Otherwise, multiple CPC contexts are created to allow multiplexing more 1852 * counter events than existing counters onto the counters by iterating through 1853 * all of the CPC contexts, programming the counters with each CPC context one 1854 * at a time and measuring the resulting counter values. Each of the resulting 1855 * CPC contexts contains some number of requested counter events less than or 1856 * equal the number of counters on a CPU depending on whether all the counter 1857 * events can be programmed on all the counters at the same time or not. 1858 * 1859 * Flags to kmem_{,z}alloc() are passed in as an argument to allow specifying 1860 * whether memory allocation should be non-blocking or not. The code will try 1861 * to allocate *whole* CPC contexts if possible. If there is any memory 1862 * allocation failure during the allocations needed for a given CPC context, it 1863 * will skip allocating that CPC context because it cannot allocate the whole 1864 * thing. Thus, the only time that it will end up allocating none (ie. no CPC 1865 * contexts whatsoever) is when it cannot even allocate *one* whole CPC context 1866 * without a memory allocation failure occurring. 1867 */ 1868 int 1869 kcpc_cpu_ctx_create(cpu_t *cp, kcpc_request_list_t *req_list, int kmem_flags, 1870 kcpc_ctx_t ***ctx_ptr_array, size_t *ctx_ptr_array_sz) 1871 { 1872 kcpc_ctx_t **ctx_ptrs; 1873 int nctx; 1874 int nctx_ptrs; 1875 int nreqs; 1876 kcpc_request_t *reqs; 1877 1878 if (cp == NULL || ctx_ptr_array == NULL || ctx_ptr_array_sz == NULL || 1879 req_list == NULL || req_list->krl_cnt < 1) 1880 return (-1); 1881 1882 /* 1883 * Allocate number of sets assuming that each set contains one and only 1884 * one counter event request for each counter on a CPU 1885 */ 1886 nreqs = req_list->krl_cnt; 1887 nctx_ptrs = (nreqs + cpc_ncounters - 1) / cpc_ncounters; 1888 ctx_ptrs = kmem_zalloc(nctx_ptrs * sizeof (kcpc_ctx_t *), kmem_flags); 1889 if (ctx_ptrs == NULL) 1890 return (-2); 1891 1892 /* 1893 * Fill in sets of requests 1894 */ 1895 nctx = 0; 1896 reqs = req_list->krl_list; 1897 while (nreqs > 0) { 1898 kcpc_ctx_t *ctx; 1899 kcpc_set_t *set; 1900 int subcode; 1901 1902 /* 1903 * Allocate CPC context and set for requested counter events 1904 */ 1905 ctx = kcpc_ctx_alloc(kmem_flags); 1906 set = kcpc_set_create(reqs, nreqs, 0, kmem_flags); 1907 if (set == NULL) { 1908 kcpc_ctx_free(ctx); 1909 break; 1910 } 1911 1912 /* 1913 * Determine assignment of requested counter events to specific 1914 * counters 1915 */ 1916 if (kcpc_assign_reqs(set, ctx) != 0) { 1917 /* 1918 * May not be able to assign requested counter events 1919 * to all counters since all counters may not be able 1920 * to do all events, so only do one counter event in 1921 * set of counter requests when this happens since at 1922 * least one of the counters must be able to do the 1923 * event. 1924 */ 1925 kcpc_free_set(set); 1926 set = kcpc_set_create(reqs, 1, 0, kmem_flags); 1927 if (set == NULL) { 1928 kcpc_ctx_free(ctx); 1929 break; 1930 } 1931 if (kcpc_assign_reqs(set, ctx) != 0) { 1932 #ifdef DEBUG 1933 cmn_err(CE_NOTE, "!kcpc_cpu_ctx_create: can't " 1934 "assign counter event %s!\n", 1935 set->ks_req->kr_event); 1936 #endif 1937 kcpc_free_set(set); 1938 kcpc_ctx_free(ctx); 1939 reqs++; 1940 nreqs--; 1941 continue; 1942 } 1943 } 1944 1945 /* 1946 * Allocate memory needed to hold requested counter event data 1947 */ 1948 set->ks_data = kmem_zalloc(set->ks_nreqs * sizeof (uint64_t), 1949 kmem_flags); 1950 if (set->ks_data == NULL) { 1951 kcpc_free_set(set); 1952 kcpc_ctx_free(ctx); 1953 break; 1954 } 1955 1956 /* 1957 * Configure requested counter events 1958 */ 1959 if (kcpc_configure_reqs(ctx, set, &subcode) != 0) { 1960 #ifdef DEBUG 1961 cmn_err(CE_NOTE, 1962 "!kcpc_cpu_ctx_create: can't configure " 1963 "set of counter event requests!\n"); 1964 #endif 1965 reqs += set->ks_nreqs; 1966 nreqs -= set->ks_nreqs; 1967 kmem_free(set->ks_data, 1968 set->ks_nreqs * sizeof (uint64_t)); 1969 kcpc_free_set(set); 1970 kcpc_ctx_free(ctx); 1971 continue; 1972 } 1973 1974 /* 1975 * Point set of counter event requests at this context and fill 1976 * in CPC context 1977 */ 1978 set->ks_ctx = ctx; 1979 ctx->kc_set = set; 1980 ctx->kc_cpuid = cp->cpu_id; 1981 ctx->kc_thread = curthread; 1982 1983 ctx_ptrs[nctx] = ctx; 1984 1985 /* 1986 * Update requests and how many are left to be assigned to sets 1987 */ 1988 reqs += set->ks_nreqs; 1989 nreqs -= set->ks_nreqs; 1990 1991 /* 1992 * Increment number of CPC contexts and allocate bigger array 1993 * for context pointers as needed 1994 */ 1995 nctx++; 1996 if (nctx >= nctx_ptrs) { 1997 kcpc_ctx_t **new; 1998 int new_cnt; 1999 2000 /* 2001 * Allocate more CPC contexts based on how many 2002 * contexts allocated so far and how many counter 2003 * requests left to assign 2004 */ 2005 new_cnt = nctx_ptrs + 2006 ((nreqs + cpc_ncounters - 1) / cpc_ncounters); 2007 new = kmem_zalloc(new_cnt * sizeof (kcpc_ctx_t *), 2008 kmem_flags); 2009 if (new == NULL) 2010 break; 2011 2012 /* 2013 * Copy contents of old sets into new ones 2014 */ 2015 bcopy(ctx_ptrs, new, 2016 nctx_ptrs * sizeof (kcpc_ctx_t *)); 2017 2018 /* 2019 * Free old array of context pointers and use newly 2020 * allocated one instead now 2021 */ 2022 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2023 ctx_ptrs = new; 2024 nctx_ptrs = new_cnt; 2025 } 2026 } 2027 2028 /* 2029 * Return NULL if no CPC contexts filled in 2030 */ 2031 if (nctx == 0) { 2032 kmem_free(ctx_ptrs, nctx_ptrs * sizeof (kcpc_ctx_t *)); 2033 *ctx_ptr_array = NULL; 2034 *ctx_ptr_array_sz = 0; 2035 return (-2); 2036 } 2037 2038 *ctx_ptr_array = ctx_ptrs; 2039 *ctx_ptr_array_sz = nctx_ptrs * sizeof (kcpc_ctx_t *); 2040 return (nctx); 2041 } 2042 2043 /* 2044 * Return whether PCBE supports given counter event 2045 */ 2046 boolean_t 2047 kcpc_event_supported(char *event) 2048 { 2049 if (pcbe_ops == NULL || pcbe_ops->pcbe_event_coverage(event) == 0) 2050 return (B_FALSE); 2051 2052 return (B_TRUE); 2053 } 2054 2055 /* 2056 * Program counters on current CPU with given CPC context 2057 * 2058 * If kernel is interposing on counters to measure hardware capacity and 2059 * utilization, then unprogram counters for kernel *before* programming them 2060 * with specified CPC context. 2061 * 2062 * kcpc_{program,unprogram}() may be called either directly by a thread running 2063 * on the target CPU or from a cross-call from another CPU. To protect 2064 * programming and unprogramming from being interrupted by cross-calls, callers 2065 * who execute kcpc_{program,unprogram} should raise PIL to the level used by 2066 * cross-calls. 2067 */ 2068 void 2069 kcpc_program(kcpc_ctx_t *ctx, boolean_t for_thread, boolean_t cu_interpose) 2070 { 2071 int error; 2072 2073 ASSERT(IS_HIPIL()); 2074 2075 /* 2076 * CPC context shouldn't be NULL, its CPU field should specify current 2077 * CPU or be -1 to specify any CPU when the context is bound to a 2078 * thread, and preemption should be disabled 2079 */ 2080 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2081 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2082 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2083 ctx->kc_cpuid != -1) || curthread->t_preempt < 1) 2084 return; 2085 2086 /* 2087 * Unprogram counters for kernel measuring hardware capacity and 2088 * utilization 2089 */ 2090 if (cu_interpose == B_TRUE) { 2091 cu_cpc_unprogram(CPU, &error); 2092 } else { 2093 kcpc_set_t *set = ctx->kc_set; 2094 int i; 2095 2096 ASSERT(set != NULL); 2097 2098 /* 2099 * Since cu_interpose is false, we are programming CU context. 2100 * In general, PCBE can continue from the state saved in the 2101 * set, but it is not very reliable, so we start again from the 2102 * preset value. 2103 */ 2104 for (i = 0; i < set->ks_nreqs; i++) { 2105 /* 2106 * Reset the virtual counter value to the preset value. 2107 */ 2108 *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; 2109 2110 /* 2111 * Reset PCBE to the preset value. 2112 */ 2113 pcbe_ops->pcbe_configure(0, NULL, 2114 set->ks_req[i].kr_preset, 2115 0, 0, NULL, &set->ks_req[i].kr_config, NULL); 2116 } 2117 } 2118 2119 /* 2120 * Program counters with specified CPC context 2121 */ 2122 ctx->kc_rawtick = KCPC_GET_TICK(); 2123 pcbe_ops->pcbe_program(ctx); 2124 2125 /* 2126 * Denote that counters programmed for thread or CPU CPC context 2127 * differently 2128 */ 2129 if (for_thread == B_TRUE) 2130 KCPC_CTX_FLAG_CLR(ctx, KCPC_CTX_FREEZE); 2131 else 2132 CPU->cpu_cpc_ctx = ctx; 2133 } 2134 2135 /* 2136 * Unprogram counters with given CPC context on current CPU 2137 * 2138 * If kernel is interposing on counters to measure hardware capacity and 2139 * utilization, then program counters for the kernel capacity and utilization 2140 * *after* unprogramming them for given CPC context. 2141 * 2142 * See the comment for kcpc_program regarding the synchronization with 2143 * cross-calls. 2144 */ 2145 void 2146 kcpc_unprogram(kcpc_ctx_t *ctx, boolean_t cu_interpose) 2147 { 2148 int error; 2149 2150 ASSERT(IS_HIPIL()); 2151 2152 /* 2153 * CPC context shouldn't be NULL, its CPU field should specify current 2154 * CPU or be -1 to specify any CPU when the context is bound to a 2155 * thread, and preemption should be disabled 2156 */ 2157 ASSERT(ctx != NULL && (ctx->kc_cpuid == CPU->cpu_id || 2158 ctx->kc_cpuid == -1) && curthread->t_preempt > 0); 2159 2160 if (ctx == NULL || (ctx->kc_cpuid != CPU->cpu_id && 2161 ctx->kc_cpuid != -1) || curthread->t_preempt < 1 || 2162 (ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) != 0) { 2163 return; 2164 } 2165 2166 /* 2167 * Specified CPC context to be unprogrammed should be bound to current 2168 * CPU or thread 2169 */ 2170 ASSERT(CPU->cpu_cpc_ctx == ctx || curthread->t_cpc_ctx == ctx); 2171 2172 /* 2173 * Stop counters 2174 */ 2175 pcbe_ops->pcbe_allstop(); 2176 KCPC_CTX_FLAG_SET(ctx, KCPC_CTX_INVALID_STOPPED); 2177 2178 /* 2179 * Allow kernel to interpose on counters and program them for its own 2180 * use to measure hardware capacity and utilization if cu_interpose 2181 * argument is true 2182 */ 2183 if (cu_interpose == B_TRUE) 2184 cu_cpc_program(CPU, &error); 2185 } 2186 2187 /* 2188 * Read CPU Performance Counter (CPC) on current CPU and call specified update 2189 * routine with data for each counter event currently programmed on CPU 2190 */ 2191 int 2192 kcpc_read(kcpc_update_func_t update_func) 2193 { 2194 kcpc_ctx_t *ctx; 2195 int i; 2196 kcpc_request_t *req; 2197 int retval; 2198 kcpc_set_t *set; 2199 2200 ASSERT(IS_HIPIL()); 2201 2202 /* 2203 * Can't grab locks or block because may be called inside dispatcher 2204 */ 2205 kpreempt_disable(); 2206 2207 ctx = CPU->cpu_cpc_ctx; 2208 if (ctx == NULL) { 2209 kpreempt_enable(); 2210 return (0); 2211 } 2212 2213 /* 2214 * Read counter data from current CPU 2215 */ 2216 pcbe_ops->pcbe_sample(ctx); 2217 2218 set = ctx->kc_set; 2219 if (set == NULL || set->ks_req == NULL) { 2220 kpreempt_enable(); 2221 return (0); 2222 } 2223 2224 /* 2225 * Call update function with preset pointer and data for each CPC event 2226 * request currently programmed on current CPU 2227 */ 2228 req = set->ks_req; 2229 retval = 0; 2230 for (i = 0; i < set->ks_nreqs; i++) { 2231 int ret; 2232 2233 if (req[i].kr_data == NULL) 2234 break; 2235 2236 ret = update_func(req[i].kr_ptr, *req[i].kr_data); 2237 if (ret < 0) 2238 retval = ret; 2239 } 2240 2241 kpreempt_enable(); 2242 2243 return (retval); 2244 } 2245 2246 /* 2247 * Initialize list of counter event requests 2248 */ 2249 kcpc_request_list_t * 2250 kcpc_reqs_init(int nreqs, int kmem_flags) 2251 { 2252 kcpc_request_list_t *req_list; 2253 kcpc_request_t *reqs; 2254 2255 if (nreqs < 1) 2256 return (NULL); 2257 2258 req_list = kmem_zalloc(sizeof (kcpc_request_list_t), kmem_flags); 2259 if (req_list == NULL) 2260 return (NULL); 2261 2262 reqs = kmem_zalloc(nreqs * sizeof (kcpc_request_t), kmem_flags); 2263 if (reqs == NULL) { 2264 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2265 return (NULL); 2266 } 2267 2268 req_list->krl_list = reqs; 2269 req_list->krl_cnt = 0; 2270 req_list->krl_max = nreqs; 2271 return (req_list); 2272 } 2273 2274 2275 /* 2276 * Add counter event request to given list of counter event requests 2277 */ 2278 int 2279 kcpc_reqs_add(kcpc_request_list_t *req_list, char *event, uint64_t preset, 2280 uint_t flags, uint_t nattrs, kcpc_attr_t *attr, void *ptr, int kmem_flags) 2281 { 2282 kcpc_request_t *req; 2283 2284 if (req_list == NULL || req_list->krl_list == NULL) 2285 return (-1); 2286 2287 ASSERT(req_list->krl_max != 0); 2288 2289 /* 2290 * Allocate more space (if needed) 2291 */ 2292 if (req_list->krl_cnt > req_list->krl_max) { 2293 kcpc_request_t *new; 2294 kcpc_request_t *old; 2295 2296 old = req_list->krl_list; 2297 new = kmem_zalloc((req_list->krl_max + 2298 cpc_ncounters) * sizeof (kcpc_request_t), kmem_flags); 2299 if (new == NULL) 2300 return (-2); 2301 2302 req_list->krl_list = new; 2303 bcopy(old, req_list->krl_list, 2304 req_list->krl_cnt * sizeof (kcpc_request_t)); 2305 kmem_free(old, req_list->krl_max * sizeof (kcpc_request_t)); 2306 req_list->krl_cnt = 0; 2307 req_list->krl_max += cpc_ncounters; 2308 } 2309 2310 /* 2311 * Fill in request as much as possible now, but some fields will need 2312 * to be set when request is assigned to a set. 2313 */ 2314 req = &req_list->krl_list[req_list->krl_cnt]; 2315 req->kr_config = NULL; 2316 req->kr_picnum = -1; /* have CPC pick this */ 2317 req->kr_index = -1; /* set when assigning request to set */ 2318 req->kr_data = NULL; /* set when configuring request */ 2319 (void) strcpy(req->kr_event, event); 2320 req->kr_preset = preset; 2321 req->kr_flags = flags; 2322 req->kr_nattrs = nattrs; 2323 req->kr_attr = attr; 2324 /* 2325 * Keep pointer given by caller to give to update function when this 2326 * counter event is sampled/read 2327 */ 2328 req->kr_ptr = ptr; 2329 2330 req_list->krl_cnt++; 2331 2332 return (0); 2333 } 2334 2335 /* 2336 * Reset list of CPC event requests so its space can be used for another set 2337 * of requests 2338 */ 2339 int 2340 kcpc_reqs_reset(kcpc_request_list_t *req_list) 2341 { 2342 /* 2343 * Return when pointer to request list structure or request is NULL or 2344 * when max requests is less than or equal to 0 2345 */ 2346 if (req_list == NULL || req_list->krl_list == NULL || 2347 req_list->krl_max <= 0) 2348 return (-1); 2349 2350 /* 2351 * Zero out requests and number of requests used 2352 */ 2353 bzero(req_list->krl_list, req_list->krl_max * sizeof (kcpc_request_t)); 2354 req_list->krl_cnt = 0; 2355 return (0); 2356 } 2357 2358 /* 2359 * Free given list of counter event requests 2360 */ 2361 int 2362 kcpc_reqs_fini(kcpc_request_list_t *req_list) 2363 { 2364 kmem_free(req_list->krl_list, 2365 req_list->krl_max * sizeof (kcpc_request_t)); 2366 kmem_free(req_list, sizeof (kcpc_request_list_t)); 2367 return (0); 2368 } 2369 2370 /* 2371 * Create set of given counter event requests 2372 */ 2373 static kcpc_set_t * 2374 kcpc_set_create(kcpc_request_t *reqs, int nreqs, int set_flags, int kmem_flags) 2375 { 2376 int i; 2377 kcpc_set_t *set; 2378 2379 /* 2380 * Allocate set and assign number of requests in set and flags 2381 */ 2382 set = kmem_zalloc(sizeof (kcpc_set_t), kmem_flags); 2383 if (set == NULL) 2384 return (NULL); 2385 2386 if (nreqs < cpc_ncounters) 2387 set->ks_nreqs = nreqs; 2388 else 2389 set->ks_nreqs = cpc_ncounters; 2390 2391 set->ks_flags = set_flags; 2392 2393 /* 2394 * Allocate requests needed, copy requests into set, and set index into 2395 * data for each request (which may change when we assign requested 2396 * counter events to counters) 2397 */ 2398 set->ks_req = (kcpc_request_t *)kmem_zalloc(sizeof (kcpc_request_t) * 2399 set->ks_nreqs, kmem_flags); 2400 if (set->ks_req == NULL) { 2401 kmem_free(set, sizeof (kcpc_set_t)); 2402 return (NULL); 2403 } 2404 2405 bcopy(reqs, set->ks_req, sizeof (kcpc_request_t) * set->ks_nreqs); 2406 2407 for (i = 0; i < set->ks_nreqs; i++) 2408 set->ks_req[i].kr_index = i; 2409 2410 return (set); 2411 } 2412 2413 2414 /* 2415 * Stop counters on current CPU. 2416 * 2417 * If preserve_context is true, the caller is interested in the CPU's CPC 2418 * context and wants it to be preserved. 2419 * 2420 * If preserve_context is false, the caller does not need the CPU's CPC context 2421 * to be preserved, so it is set to NULL. 2422 */ 2423 static void 2424 kcpc_cpustop_func(uintptr_t arg1, uintptr_t arg2 __unused) 2425 { 2426 boolean_t preserve_context; 2427 kpreempt_disable(); 2428 2429 preserve_context = (boolean_t)arg1; 2430 /* 2431 * Someone already stopped this context before us, so there is nothing 2432 * to do. 2433 */ 2434 if (CPU->cpu_cpc_ctx == NULL) { 2435 kpreempt_enable(); 2436 return; 2437 } 2438 2439 kcpc_unprogram(CPU->cpu_cpc_ctx, B_TRUE); 2440 /* 2441 * If CU does not use counters, then clear the CPU's CPC context 2442 * If the caller requested to preserve context it should disable CU 2443 * first, so there should be no CU context now. 2444 */ 2445 ASSERT(!preserve_context || !CU_CPC_ON(CPU)); 2446 if (!preserve_context && CPU->cpu_cpc_ctx != NULL && !CU_CPC_ON(CPU)) 2447 CPU->cpu_cpc_ctx = NULL; 2448 2449 kpreempt_enable(); 2450 } 2451 2452 /* 2453 * Stop counters on given CPU and set its CPC context to NULL unless 2454 * preserve_context is true. 2455 */ 2456 void 2457 kcpc_cpu_stop(cpu_t *cp, boolean_t preserve_context) 2458 { 2459 cpu_call(cp, kcpc_cpustop_func, preserve_context, 0); 2460 } 2461 2462 /* 2463 * Program the context on the current CPU 2464 */ 2465 static void 2466 kcpc_remoteprogram_func(uintptr_t arg1, uintptr_t arg2) 2467 { 2468 kcpc_ctx_t *ctx = (kcpc_ctx_t *)arg1; 2469 boolean_t for_thread = (boolean_t)arg2; 2470 2471 ASSERT(ctx != NULL); 2472 2473 kpreempt_disable(); 2474 kcpc_program(ctx, for_thread, B_TRUE); 2475 kpreempt_enable(); 2476 } 2477 2478 /* 2479 * Program counters on given CPU 2480 */ 2481 void 2482 kcpc_cpu_program(cpu_t *cp, kcpc_ctx_t *ctx) 2483 { 2484 cpu_call(cp, kcpc_remoteprogram_func, (uintptr_t)ctx, 2485 (uintptr_t)B_FALSE); 2486 } 2487 2488 char * 2489 kcpc_list_attrs(void) 2490 { 2491 ASSERT(pcbe_ops != NULL); 2492 2493 return (pcbe_ops->pcbe_list_attrs()); 2494 } 2495 2496 char * 2497 kcpc_list_events(uint_t pic) 2498 { 2499 ASSERT(pcbe_ops != NULL); 2500 2501 return (pcbe_ops->pcbe_list_events(pic)); 2502 } 2503 2504 uint_t 2505 kcpc_pcbe_capabilities(void) 2506 { 2507 ASSERT(pcbe_ops != NULL); 2508 2509 return (pcbe_ops->pcbe_caps); 2510 } 2511 2512 int 2513 kcpc_pcbe_loaded(void) 2514 { 2515 return (pcbe_ops == NULL ? -1 : 0); 2516 } 2517